megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,730 @@
1
+ r"""
2
+
3
+ separate_detections_into_folders.py
4
+
5
+ **Overview**
6
+
7
+ Given a .json file with batch processing results, separate the files in that
8
+ set of results into folders that contain animals/people/vehicles/nothing,
9
+ according to per-class thresholds.
10
+
11
+ Image files are copied, not moved.
12
+
13
+ **Output structure**
14
+
15
+ Preserves relative paths within each of those folders; cannot be used with .json
16
+ files that have absolute paths in them.
17
+
18
+ For example, if your .json file has these images:
19
+
20
+ * a/b/c/1.jpg
21
+ * a/b/d/2.jpg
22
+ * a/b/e/3.jpg
23
+ * a/b/f/4.jpg
24
+ * a/x/y/5.jpg
25
+
26
+ And let's say:
27
+
28
+ * The results say that the first three images are empty/person/vehicle, respectively
29
+ * The fourth image is above threshold for "animal" and "person"
30
+ * The fifth image contains an animal
31
+
32
+ * You specify an output base folder of c:/out
33
+
34
+ You will get the following files:
35
+
36
+ * c:/out/empty/a/b/c/1.jpg
37
+ * c:/out/people/a/b/d/2.jpg
38
+ * c:/out/vehicles/a/b/e/3.jpg
39
+ * c:/out/animal_person/a/b/f/4.jpg
40
+ * c:/out/animals/a/x/y/5.jpg
41
+
42
+ **Rendering bounding boxes**
43
+
44
+ By default, images are just copied to the target output folder. If you specify --render_boxes,
45
+ bounding boxes will be rendered on the output images. Because this is no longer strictly
46
+ a copy operation, this may result in the loss of metadata. More accurately, this *may*
47
+ result in the loss of some EXIF metadata; this *will* result in the loss of IPTC/XMP metadata.
48
+
49
+ Rendering boxes also makes this script a lot slower.
50
+
51
+ **Classification-based separation**
52
+
53
+ If you have a results file with classification data, you can also specify classes to put
54
+ in their own folders, within the "animals" folder, like this:
55
+
56
+ ``--classification_thresholds "deer=0.75,cow=0.75"``
57
+
58
+ So, e.g., you might get:
59
+
60
+ c:/out/animals/deer/a/x/y/5.jpg
61
+
62
+ In this scenario, the folders within "animals" will be:
63
+
64
+ deer, cow, multiple, unclassified
65
+
66
+ "multiple" in this case only means "deer and cow"; if an image is classified as containing a
67
+ bird and a bear, that would end up in "unclassified", since the folder separation is based only
68
+ on the categories you provide at the command line.
69
+
70
+ No classification-based separation is done within the animal_person, animal_vehicle, or
71
+ animal_person_vehicle folders.
72
+
73
+ """
74
+
75
+ #%% Constants and imports
76
+
77
+ import argparse
78
+ import json
79
+ import os
80
+ import shutil
81
+ import sys
82
+ import itertools
83
+
84
+ from multiprocessing.pool import ThreadPool
85
+ from functools import partial
86
+ from tqdm import tqdm
87
+
88
+ from megadetector.utils.ct_utils import args_to_object, is_float
89
+ from megadetector.detection.run_detector import get_typical_confidence_threshold_from_results
90
+ from megadetector.visualization import visualization_utils as vis_utils
91
+
92
+ friendly_folder_names = {'animal':'animals','person':'people','vehicle':'vehicles'}
93
+
94
+ # Occasionally we have near-zero confidence detections associated with COCO classes that
95
+ # didn't quite get squeezed out of the model in training. As long as they're near zero
96
+ # confidence, we just ignore them.
97
+ invalid_category_epsilon = 0.00001
98
+
99
+ default_line_thickness = 8
100
+ default_box_expansion = 3
101
+
102
+
103
+ #%% Options class
104
+
105
+ class SeparateDetectionsIntoFoldersOptions:
106
+ """
107
+ Options used to parameterize separate_detections_into_folders()
108
+ """
109
+
110
+ def __init__(self,threshold=None):
111
+
112
+ #: Default threshold for categories not specified in category_name_to_threshold
113
+ self.threshold = None
114
+
115
+ #: Dict mapping category names to thresholds; for example, an image with only a detection of class
116
+ #: "animal" whose confidence is greater than or equal to category_name_to_threshold['animal']
117
+ #: will be put in the "animal" folder.
118
+ self.category_name_to_threshold = {
119
+ 'animal': self.threshold,
120
+ 'person': self.threshold,
121
+ 'vehicle': self.threshold
122
+ }
123
+
124
+ #: Number of workers to use, set to <= 1 to disable parallelization
125
+ self.n_threads = 1
126
+
127
+ #: By default, this function errors if you try to output to an existing folder
128
+ self.allow_existing_directory = False
129
+
130
+ #: By default, this function errors if any of the images specified in the results file don't
131
+ #: exist in the source folder.
132
+ self.allow_missing_files = False
133
+
134
+ #: Whether to overwrite images that already exist in the target folder; only relevant if
135
+ #: [allow_existing_directory] is True
136
+ self.overwrite = True
137
+
138
+ #: Whether to skip empty images; if this is False, empty images (i.e., images with no detections
139
+ #: above the corresponding threshold) will be copied to an "empty" folder.
140
+ self.skip_empty_images = False
141
+
142
+ #: The MD results .json file to process
143
+ self.results_file = None
144
+
145
+ #: The folder containing source images; filenames in [results_file] should be relative to this
146
+ #: folder.
147
+ self.base_input_folder = None
148
+
149
+ #: The folder to which we should write output images; see the module header comment for information
150
+ #: about how that folder will be structured.
151
+ self.base_output_folder = None
152
+
153
+ #: Should we move rather than copy?
154
+ self.move_images = False
155
+
156
+ #: Should we render boxes on the output images? Makes everything a lot slower.
157
+ self.render_boxes = False
158
+
159
+ #: Line thickness in pixels; only relevant if [render_boxes] is True
160
+ self.line_thickness = default_line_thickness
161
+
162
+ #: Box expansion in pixels; only relevant if [render_boxes] is True
163
+ self.box_expansion = default_box_expansion
164
+
165
+ #: Originally specified as a string that looks like this:
166
+ #:
167
+ #: deer=0.75,cow=0.75
168
+ #:
169
+ #: Converted internally to a dict mapping name:threshold
170
+ self.classification_thresholds = None
171
+
172
+ ## Debug or internal attributes
173
+
174
+ #: Do not set explicitly; populated from data when using classification results
175
+ self.classification_category_id_to_name = None
176
+
177
+ #: Do not set explicitly; populated from data when using classification results
178
+ self.classification_categories = None
179
+
180
+ #: Used to test this script; sets a limit on the number of images to process.
181
+ self.debug_max_images = None
182
+
183
+ #: Do not set explicitly; this gets created based on [results_file]
184
+ #:
185
+ #:Dictionary mapping categories (plus combinations of categories, and 'empty') to output folders
186
+ self.category_name_to_folder = None
187
+
188
+ #: Do not set explicitly; this gets loaded from [results_file]
189
+ self.category_id_to_category_name = None
190
+
191
+ # ...__init__()
192
+
193
+ # ...class SeparateDetectionsIntoFoldersOptions
194
+
195
+
196
+ #%% Support functions
197
+
198
+ def _path_is_abs(p): return (len(p) > 1) and (p[0] == '/' or p[1] == ':')
199
+
200
+ printed_missing_file_warning = False
201
+
202
+ def _process_detections(im,options):
203
+ """
204
+ Process all detections for a single image
205
+
206
+ May modify *im*.
207
+ """
208
+
209
+ global printed_missing_file_warning
210
+
211
+ relative_filename = im['file']
212
+
213
+ detections = None
214
+ if 'detections' in im:
215
+ detections = im['detections']
216
+
217
+ categories_above_threshold = None
218
+
219
+ if detections is None:
220
+
221
+ assert im['failure'] is not None and len(im['failure']) > 0
222
+ target_folder = options.category_name_to_folder['failure']
223
+
224
+ else:
225
+
226
+ category_name_to_max_confidence = {}
227
+ category_names = options.category_id_to_category_name.values()
228
+ for category_name in category_names:
229
+ category_name_to_max_confidence[category_name] = 0.0
230
+
231
+ # Find the maximum confidence for each category
232
+ #
233
+ # det = detections[0]
234
+ for det in detections:
235
+
236
+ category_id = det['category']
237
+
238
+ # For zero-confidence detections, we occasionally have leftover goop
239
+ # from COCO classes
240
+ if category_id not in options.category_id_to_category_name:
241
+ print('Warning: unrecognized category {} in file {}'.format(
242
+ category_id,relative_filename))
243
+ # assert det['conf'] < invalid_category_epsilon
244
+ continue
245
+
246
+ category_name = options.category_id_to_category_name[category_id]
247
+ if det['conf'] > category_name_to_max_confidence[category_name]:
248
+ category_name_to_max_confidence[category_name] = det['conf']
249
+
250
+ # ...for each detection on this image
251
+
252
+ # Count the number of thresholds exceeded
253
+ categories_above_threshold = []
254
+ for category_name in category_names:
255
+
256
+ threshold = options.category_name_to_threshold[category_name]
257
+ assert threshold is not None
258
+
259
+ max_confidence_this_category = category_name_to_max_confidence[category_name]
260
+ if max_confidence_this_category >= threshold:
261
+ categories_above_threshold.append(category_name)
262
+
263
+ # ...for each category
264
+
265
+ categories_above_threshold.sort()
266
+
267
+ using_classification_folders = (options.classification_thresholds is not None and \
268
+ len(options.classification_thresholds) > 0)
269
+
270
+ # If this is above multiple thresholds
271
+ if len(categories_above_threshold) > 1:
272
+
273
+ # Currently "animal_person" images get put into the "animal_person" folder, even if we're
274
+ # doing species-based separation. Ideally, we would optionally put these in either the "deer"
275
+ # folder or a "deer_person" folder, but this is pretty esoteric, so not worrying about this
276
+ # for now.
277
+ target_folder = options.category_name_to_folder['_'.join(categories_above_threshold)]
278
+
279
+ elif len(categories_above_threshold) == 0:
280
+
281
+ target_folder = options.category_name_to_folder['empty']
282
+
283
+ else:
284
+
285
+ assert len(categories_above_threshold) == 1
286
+
287
+ target_folder = options.category_name_to_folder[categories_above_threshold[0]]
288
+
289
+ # Are we making species classification folders, and is this an animal?
290
+ if ('animal' in categories_above_threshold) and (using_classification_folders):
291
+
292
+ # Do we need to put this into a specific species folder?
293
+
294
+ # Find the animal-class detections that are above threshold
295
+ category_name_to_id = {v: k for k, v in options.category_id_to_category_name.items()}
296
+ animal_category_id = category_name_to_id['animal']
297
+ valid_animal_detections = [d for d in detections if \
298
+ (d['category'] == animal_category_id and \
299
+ d['conf'] >= options.category_name_to_threshold['animal'])]
300
+
301
+ # Count the number of classification categories that are above threshold for at
302
+ # least one detection
303
+ classification_categories_above_threshold = set()
304
+
305
+ # d = valid_animal_detections[0]
306
+ for d in valid_animal_detections:
307
+
308
+ if 'classifications' not in d or d['classifications'] is None:
309
+ continue
310
+
311
+ # classification = d['classifications'][0]
312
+ for classification in d['classifications']:
313
+
314
+ classification_category_id = classification[0]
315
+ classification_confidence = classification[1]
316
+
317
+ # Do we have a threshold for this category, and if so, is
318
+ # this classification above threshold?
319
+ assert options.classification_category_id_to_name is not None
320
+ classification_category_name = \
321
+ options.classification_category_id_to_name[classification_category_id]
322
+ if (classification_category_name in options.classification_thresholds) and \
323
+ (classification_confidence > \
324
+ options.classification_thresholds[classification_category_name]):
325
+ classification_categories_above_threshold.add(classification_category_name)
326
+
327
+ # ...for each classification
328
+
329
+ # ...for each detection
330
+
331
+ if len(classification_categories_above_threshold) == 0:
332
+ classification_folder_name = 'unclassified'
333
+
334
+ elif len(classification_categories_above_threshold) > 1:
335
+ classification_folder_name = 'multiple'
336
+
337
+ else:
338
+ assert len(classification_categories_above_threshold) == 1
339
+ classification_folder_name = list(classification_categories_above_threshold)[0]
340
+
341
+ target_folder = os.path.join(target_folder,classification_folder_name)
342
+
343
+ # ...if we have to deal with classification subfolders
344
+
345
+ # ...if we have 0/1/more categories above threshold
346
+
347
+ # ...if this is/isn't a failure case
348
+
349
+ source_path = os.path.join(options.base_input_folder,relative_filename)
350
+ if not os.path.isfile(source_path):
351
+ if not options.allow_missing_files:
352
+ raise ValueError('Cannot find file {}'.format(source_path))
353
+ else:
354
+ if not printed_missing_file_warning:
355
+ print('Warning: cannot find at least one file ({})'.format(source_path))
356
+ printed_missing_file_warning = True
357
+ return
358
+
359
+ target_path = os.path.join(target_folder,relative_filename)
360
+ if (not options.overwrite) and (os.path.isfile(target_path)):
361
+ return
362
+
363
+ target_dir = os.path.dirname(target_path)
364
+ os.makedirs(target_dir,exist_ok=True)
365
+
366
+ # Skip this image if it's empty and we're not processing empty images
367
+ if ((categories_above_threshold is None) or (len(categories_above_threshold) == 0)) and \
368
+ options.skip_empty_images:
369
+ return
370
+
371
+ # At this point, this image is getting copied; we may or may not also need to
372
+ # draw bounding boxes.
373
+
374
+ # Do a simple copy operation if we don't need to render any boxes
375
+ if (not options.render_boxes) or \
376
+ (categories_above_threshold is None) or \
377
+ (len(categories_above_threshold) == 0):
378
+
379
+ if options.move_images:
380
+ shutil.move(source_path,target_path)
381
+ else:
382
+ shutil.copyfile(source_path,target_path)
383
+
384
+ else:
385
+
386
+ # Open the source image
387
+ pil_image = vis_utils.load_image(source_path)
388
+
389
+ # Render bounding boxes for each category separately, because
390
+ # we allow different thresholds for each category.
391
+
392
+ category_name_to_id = {v: k for k, v in options.category_id_to_category_name.items()}
393
+ assert len(category_name_to_id) == len(options.category_id_to_category_name)
394
+
395
+ classification_label_map = None
396
+ if using_classification_folders:
397
+ classification_label_map = options.classification_categories
398
+
399
+ for category_name in categories_above_threshold:
400
+
401
+ category_id = category_name_to_id[category_name]
402
+ category_threshold = options.category_name_to_threshold[category_name]
403
+ assert category_threshold is not None
404
+ category_detections = [d for d in detections if d['category'] == category_id]
405
+
406
+ # When we're not using classification folders, remove classification
407
+ # information to maintain standard detection colors.
408
+ if not using_classification_folders:
409
+ for d in category_detections:
410
+ if 'classifications' in d:
411
+ del d['classifications']
412
+
413
+ vis_utils.render_detection_bounding_boxes(
414
+ category_detections,
415
+ pil_image,
416
+ label_map=options.detection_categories,
417
+ classification_label_map=classification_label_map,
418
+ confidence_threshold=category_threshold,
419
+ thickness=options.line_thickness,
420
+ expansion=options.box_expansion)
421
+
422
+ # ...for each category
423
+
424
+ # Try to preserve EXIF data and image quality when saving
425
+ vis_utils.exif_preserving_save(pil_image,target_path)
426
+
427
+ # ...if we don't/do need to render boxes
428
+
429
+ # ...def _process_detections()
430
+
431
+
432
+ #%% Main function
433
+
434
+ def separate_detections_into_folders(options):
435
+ """
436
+ Given a .json file with batch processing results, separate the files in that
437
+ set of results into folders that contain animals/people/vehicles/nothing,
438
+ according to per-class thresholds. See the header comment of this module for
439
+ more details about the output folder structure.
440
+
441
+ Args:
442
+ options (SeparateDetectionsIntoFoldersOptions): parameters guiding image
443
+ separation, see the SeparateDetectionsIntoFoldersOptions documentation for specific
444
+ options.
445
+ """
446
+
447
+ # Input validation
448
+
449
+ # Currently we don't support moving (instead of copying) when we're also rendering
450
+ # bounding boxes.
451
+ assert not (options.render_boxes and options.move_images), \
452
+ 'Cannot specify both render_boxes and move_images'
453
+
454
+ # Create output folder if necessary
455
+ if (os.path.isdir(options.base_output_folder)) and \
456
+ (len(os.listdir(options.base_output_folder) ) > 0):
457
+ if options.allow_existing_directory:
458
+ print('Warning: target folder exists and is not empty... did ' + \
459
+ 'you mean to delete an old version?')
460
+ else:
461
+ raise ValueError('Target folder exists and is not empty')
462
+ os.makedirs(options.base_output_folder,exist_ok=True)
463
+
464
+ # Load detection results
465
+ print('Loading detection results')
466
+ results = json.load(open(options.results_file))
467
+ images = results['images']
468
+
469
+ for im in images:
470
+ fn = im['file']
471
+ assert not _path_is_abs(fn), 'Cannot process results with absolute image paths'
472
+
473
+ print('Processing detections for {} images'.format(len(images)))
474
+
475
+ default_threshold = options.threshold
476
+
477
+ if default_threshold is None:
478
+ default_threshold = get_typical_confidence_threshold_from_results(results)
479
+
480
+ detection_categories = results['detection_categories']
481
+ options.detection_categories = detection_categories
482
+ options.category_id_to_category_name = detection_categories
483
+
484
+ # Map class names to output folders
485
+ options.category_name_to_folder = {}
486
+ options.category_name_to_folder['empty'] = os.path.join(options.base_output_folder,'empty')
487
+ options.category_name_to_folder['failure'] =\
488
+ os.path.join(options.base_output_folder,'processing_failure')
489
+
490
+ # Create all combinations of categories
491
+ category_names = list(detection_categories.values())
492
+ category_names.sort()
493
+
494
+ # category_name = category_names[0]
495
+ for category_name in category_names:
496
+
497
+ # Do we have a custom threshold for this category?
498
+ assert category_name in options.category_name_to_threshold
499
+ if options.category_name_to_threshold[category_name] is None:
500
+ options.category_name_to_threshold[category_name] = default_threshold
501
+
502
+ category_threshold = options.category_name_to_threshold[category_name]
503
+ print('Processing category {} at threshold {}'.format(category_name,category_threshold))
504
+
505
+ target_category_names = []
506
+ for c in category_names:
507
+
508
+ target_category_names.append(c)
509
+
510
+ for combination_length in range(2,len(category_names)+1):
511
+
512
+ combined_category_names = list(itertools.combinations(category_names,combination_length))
513
+
514
+ for combination in combined_category_names:
515
+ combined_name = '_'.join(combination)
516
+ target_category_names.append(combined_name)
517
+
518
+ # Create folder mappings for each category
519
+ for category_name in target_category_names:
520
+
521
+ folder_name = category_name
522
+
523
+ if category_name in friendly_folder_names:
524
+ folder_name = friendly_folder_names[category_name]
525
+
526
+ options.category_name_to_folder[category_name] = \
527
+ os.path.join(options.base_output_folder,folder_name)
528
+
529
+ # Create the actual folders
530
+ for folder in options.category_name_to_folder.values():
531
+ os.makedirs(folder,exist_ok=True)
532
+
533
+ # Handle species classification thresholds, if specified
534
+ if options.classification_thresholds is not None:
535
+
536
+ assert 'classification_categories' in results and \
537
+ results['classification_categories'] is not None, \
538
+ 'Classification thresholds specified, but no classification results available'
539
+
540
+ classification_categories = results['classification_categories']
541
+ classification_category_name_to_id = {v: k for k, v in classification_categories.items()}
542
+ classification_category_id_to_name = {k: v for k, v in classification_categories.items()}
543
+ options.classification_category_id_to_name = classification_category_id_to_name
544
+ options.classification_categories = classification_categories
545
+
546
+ if isinstance(options.classification_thresholds,str):
547
+
548
+ # E.g. deer=0.75,cow=0.75
549
+ tokens = options.classification_thresholds.split(',')
550
+ classification_thresholds = {}
551
+
552
+ # token = tokens[0]
553
+ for token in tokens:
554
+ subtokens = token.split('=')
555
+ assert len(subtokens) == 2 and is_float(subtokens[1]), \
556
+ 'Illegal classification threshold {}'.format(token)
557
+ classification_thresholds[subtokens[0]] = float(subtokens[1])
558
+
559
+ # ...for each token
560
+
561
+ options.classification_thresholds = classification_thresholds
562
+
563
+ # ...if classification thresholds are still in string format
564
+
565
+ # Validate the classes in the threshold list
566
+ for class_name in options.classification_thresholds.keys():
567
+ assert class_name in classification_category_name_to_id, \
568
+ 'Category {} specified at the command line, but is not available in the results file'.format(
569
+ class_name)
570
+
571
+ # ...if we need to deal with classification categories
572
+
573
+ if options.n_threads <= 1 or options.debug_max_images is not None:
574
+
575
+ # i_image = 14; im = images[i_image]; im
576
+ for i_image,im in enumerate(tqdm(images)):
577
+ if options.debug_max_images is not None and i_image > options.debug_max_images:
578
+ break
579
+ _process_detections(im,options)
580
+ # ...for each image
581
+
582
+ else:
583
+
584
+ print('Starting a pool with {} threads'.format(options.n_threads))
585
+ pool = ThreadPool(options.n_threads)
586
+ process_detections_with_options = partial(_process_detections, options=options)
587
+ _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
588
+
589
+ # ...def separate_detections_into_folders
590
+
591
+
592
+ #%% Interactive driver
593
+
594
+ if False:
595
+
596
+ pass
597
+
598
+ #%%
599
+
600
+ options = SeparateDetectionsIntoFoldersOptions()
601
+
602
+ options.results_file = os.path.expanduser(
603
+ '~/data/snapshot-safari-2022-08-16-KRU-v5a.0.0_detections.json')
604
+ options.base_input_folder = os.path.expanduser('~/data/KRU/KRU_public')
605
+ options.base_output_folder = os.path.expanduser('~/data/KRU-separated')
606
+ options.n_threads = 100
607
+ options.render_boxes = True
608
+ options.allow_existing_directory = True
609
+
610
+ #%%
611
+
612
+ options = SeparateDetectionsIntoFoldersOptions()
613
+
614
+ options.results_file = os.path.expanduser('~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json')
615
+ options.base_input_folder = os.path.expanduser('~/data/ENA24/images')
616
+ options.base_output_folder = os.path.expanduser('~/data/ENA24-separated')
617
+ options.n_threads = 100
618
+ options.classification_thresholds = 'deer=0.75,cow=0.75,bird=0.75'
619
+ options.render_boxes = True
620
+ options.allow_existing_directory = True
621
+
622
+ #%%
623
+
624
+ separate_detections_into_folders(options)
625
+
626
+ #%% Testing various command-line invocations
627
+
628
+ """
629
+ # With boxes, no classification
630
+ python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10
631
+
632
+ # No boxes, no classification (default)
633
+ python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory
634
+
635
+ # With boxes, with classification
636
+ python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10 --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
637
+
638
+ # No boxes, with classification
639
+ python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
640
+ """
641
+
642
+ #%% Command-line driver
643
+
644
+ def main():
645
+
646
+ parser = argparse.ArgumentParser()
647
+ parser.add_argument('results_file', type=str, help='Input .json filename')
648
+ parser.add_argument('base_input_folder', type=str, help='Input image folder')
649
+ parser.add_argument('base_output_folder', type=str, help='Output image folder')
650
+
651
+ parser.add_argument('--threshold', type=float, default=None,
652
+ help='Default confidence threshold for all categories (defaults to ' + \
653
+ 'selection based on model version, other options may override this ' + \
654
+ 'for specific categories)')
655
+ parser.add_argument('--animal_threshold', type=float, default=None,
656
+ help='Confidence threshold for the animal category')
657
+ parser.add_argument('--human_threshold', type=float, default=None,
658
+ help='Confidence threshold for the human category')
659
+ parser.add_argument('--vehicle_threshold', type=float, default=None,
660
+ help='Confidence threshold for vehicle category')
661
+ parser.add_argument('--classification_thresholds', type=str, default=None,
662
+ help='List of classification thresholds to use for species-based folder ' + \
663
+ 'separation, formatted as, e.g., "deer=0.75,cow=0.75"')
664
+
665
+ parser.add_argument('--n_threads', type=int, default=1,
666
+ help='Number of threads to use for parallel operation (default=1)')
667
+
668
+ parser.add_argument('--allow_existing_directory', action='store_true',
669
+ help='Proceed even if the target directory exists and is not empty')
670
+ parser.add_argument('--no_overwrite', action='store_true',
671
+ help='Skip images that already exist in the target folder, must also ' + \
672
+ 'specify --allow_existing_directory')
673
+ parser.add_argument('--skip_empty_images', action='store_true',
674
+ help='Do not copy empty images to the output folder')
675
+ parser.add_argument('--move_images', action='store_true',
676
+ help='Move images (rather than copying) (not recommended this if you have not ' + \
677
+ 'backed up your data!)')
678
+
679
+ parser.add_argument('--render_boxes', action='store_true',
680
+ help='Render bounding boxes on output images; may result in some ' + \
681
+ 'metadata not being transferred')
682
+ parser.add_argument('--line_thickness', type=int, default=default_line_thickness,
683
+ help='Line thickness (in pixels) for rendering, only meaningful if ' + \
684
+ 'using render_boxes (defaults to {})'.format(
685
+ default_line_thickness))
686
+ parser.add_argument('--box_expansion', type=int, default=default_line_thickness,
687
+ help='Box expansion (in pixels) for rendering, only meaningful if ' + \
688
+ 'using render_boxes (defaults to {})'.format(
689
+ default_box_expansion))
690
+
691
+ if len(sys.argv[1:])==0:
692
+ parser.print_help()
693
+ parser.exit()
694
+
695
+ args = parser.parse_args()
696
+
697
+ # Convert to an options object
698
+ options = SeparateDetectionsIntoFoldersOptions()
699
+
700
+ args_to_object(args, options)
701
+
702
+ def validate_threshold(v,name):
703
+ # print('{} {}'.format(v,name))
704
+ if v is not None:
705
+ assert v >= 0.0 and v <= 1.0, \
706
+ 'Illegal {} threshold {}'.format(name,v)
707
+
708
+ validate_threshold(args.threshold,'default')
709
+ validate_threshold(args.animal_threshold,'animal')
710
+ validate_threshold(args.vehicle_threshold,'vehicle')
711
+ validate_threshold(args.human_threshold,'human')
712
+
713
+ if args.threshold is not None:
714
+ if args.animal_threshold is not None \
715
+ and args.human_threshold is not None \
716
+ and args.vehicle_threshold is not None:
717
+ raise ValueError('Default threshold specified, but all category thresholds ' + \
718
+ 'also specified... not exactly wrong, but it\'s likely that you ' + \
719
+ 'meant something else.')
720
+
721
+ options.category_name_to_threshold['animal'] = args.animal_threshold
722
+ options.category_name_to_threshold['person'] = args.human_threshold
723
+ options.category_name_to_threshold['vehicle'] = args.vehicle_threshold
724
+
725
+ options.overwrite = (not args.no_overwrite)
726
+
727
+ separate_detections_into_folders(options)
728
+
729
+ if __name__ == '__main__':
730
+ main()