megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,731 +0,0 @@
1
- r"""
2
-
3
- separate_detections_into_folders.py
4
-
5
- **Overview**
6
-
7
- Given a .json file with batch processing results, separate the files in that
8
- set of results into folders that contain animals/people/vehicles/nothing,
9
- according to per-class thresholds.
10
-
11
- Image files are copied, not moved.
12
-
13
- **Output structure**
14
-
15
- Preserves relative paths within each of those folders; cannot be used with .json
16
- files that have absolute paths in them.
17
-
18
- For example, if your .json file has these images:
19
-
20
- * a/b/c/1.jpg
21
- * a/b/d/2.jpg
22
- * a/b/e/3.jpg
23
- * a/b/f/4.jpg
24
- * a/x/y/5.jpg
25
-
26
- And let's say:
27
-
28
- * The results say that the first three images are empty/person/vehicle, respectively
29
- * The fourth image is above threshold for "animal" and "person"
30
- * The fifth image contains an animal
31
-
32
- * You specify an output base folder of c:/out
33
-
34
- You will get the following files:
35
-
36
- * c:/out/empty/a/b/c/1.jpg
37
- * c:/out/people/a/b/d/2.jpg
38
- * c:/out/vehicles/a/b/e/3.jpg
39
- * c:/out/animal_person/a/b/f/4.jpg
40
- * c:/out/animals/a/x/y/5.jpg
41
-
42
- **Rendering bounding boxes**
43
-
44
- By default, images are just copied to the target output folder. If you specify --render_boxes,
45
- bounding boxes will be rendered on the output images. Because this is no longer strictly
46
- a copy operation, this may result in the loss of metadata. More accurately, this *may*
47
- result in the loss of some EXIF metadata; this *will* result in the loss of IPTC/XMP metadata.
48
-
49
- Rendering boxes also makes this script a lot slower.
50
-
51
- **Classification-based separation**
52
-
53
- If you have a results file with classification data, you can also specify classes to put
54
- in their own folders, within the "animals" folder, like this:
55
-
56
- ``--classification_thresholds "deer=0.75,cow=0.75"``
57
-
58
- So, e.g., you might get:
59
-
60
- c:/out/animals/deer/a/x/y/5.jpg
61
-
62
- In this scenario, the folders within "animals" will be:
63
-
64
- deer, cow, multiple, unclassified
65
-
66
- "multiple" in this case only means "deer and cow"; if an image is classified as containing a
67
- bird and a bear, that would end up in "unclassified", since the folder separation is based only
68
- on the categories you provide at the command line.
69
-
70
- No classification-based separation is done within the animal_person, animal_vehicle, or
71
- animal_person_vehicle folders.
72
-
73
- """
74
-
75
- #%% Constants and imports
76
-
77
- import argparse
78
- import json
79
- import os
80
- import shutil
81
- import sys
82
- import itertools
83
-
84
- from multiprocessing.pool import ThreadPool
85
- from functools import partial
86
- from tqdm import tqdm
87
-
88
- from md_utils.ct_utils import args_to_object, is_float
89
- from detection.run_detector import get_typical_confidence_threshold_from_results
90
-
91
- import md_visualization.visualization_utils as vis_utils
92
-
93
- friendly_folder_names = {'animal':'animals','person':'people','vehicle':'vehicles'}
94
-
95
- # Occasionally we have near-zero confidence detections associated with COCO classes that
96
- # didn't quite get squeezed out of the model in training. As long as they're near zero
97
- # confidence, we just ignore them.
98
- invalid_category_epsilon = 0.00001
99
-
100
- default_line_thickness = 8
101
- default_box_expansion = 3
102
-
103
-
104
- #%% Options class
105
-
106
- class SeparateDetectionsIntoFoldersOptions:
107
- """
108
- Options used to parameterize separate_detections_into_folders()
109
- """
110
-
111
- def __init__(self,threshold=None):
112
-
113
- #: Default threshold for categories not specified in category_name_to_threshold
114
- self.threshold = None
115
-
116
- #: Dict mapping category names to thresholds; for example, an image with only a detection of class
117
- #: "animal" whose confidence is greater than or equal to category_name_to_threshold['animal']
118
- #: will be put in the "animal" folder.
119
- self.category_name_to_threshold = {
120
- 'animal': self.threshold,
121
- 'person': self.threshold,
122
- 'vehicle': self.threshold
123
- }
124
-
125
- #: Number of workers to use, set to <= 1 to disable parallelization
126
- self.n_threads = 1
127
-
128
- #: By default, this function errors if you try to output to an existing folder
129
- self.allow_existing_directory = False
130
-
131
- #: By default, this function errors if any of the images specified in the results file don't
132
- #: exist in the source folder.
133
- self.allow_missing_files = False
134
-
135
- #: Whether to overwrite images that already exist in the target folder; only relevant if
136
- #: [allow_existing_directory] is True
137
- self.overwrite = True
138
-
139
- #: Whether to skip empty images; if this is False, empty images (i.e., images with no detections
140
- #: above the corresponding threshold) will be copied to an "empty" folder.
141
- self.skip_empty_images = False
142
-
143
- #: The MD results .json file to process
144
- self.results_file = None
145
-
146
- #: The folder containing source images; filenames in [results_file] should be relative to this
147
- #: folder.
148
- self.base_input_folder = None
149
-
150
- #: The folder to which we should write output images; see the module header comment for information
151
- #: about how that folder will be structured.
152
- self.base_output_folder = None
153
-
154
- #: Should we move rather than copy?
155
- self.move_images = False
156
-
157
- #: Should we render boxes on the output images? Makes everything a lot slower.
158
- self.render_boxes = False
159
-
160
- #: Line thickness in pixels; only relevant if [render_boxes] is True
161
- self.line_thickness = default_line_thickness
162
-
163
- #: Box expansion in pixels; only relevant if [render_boxes] is True
164
- self.box_expansion = default_box_expansion
165
-
166
- #: Originally specified as a string that looks like this:
167
- #:
168
- #: deer=0.75,cow=0.75
169
- #:
170
- #: Converted internally to a dict mapping name:threshold
171
- self.classification_thresholds = None
172
-
173
- ## Debug or internal attributes
174
-
175
- #: Do not set explicitly; populated from data when using classification results
176
- self.classification_category_id_to_name = None
177
-
178
- #: Do not set explicitly; populated from data when using classification results
179
- self.classification_categories = None
180
-
181
- #: Used to test this script; sets a limit on the number of images to process.
182
- self.debug_max_images = None
183
-
184
- #: Do not set explicitly; this gets created based on [results_file]
185
- #:
186
- #:Dictionary mapping categories (plus combinations of categories, and 'empty') to output folders
187
- self.category_name_to_folder = None
188
-
189
- #: Do not set explicitly; this gets loaded from [results_file]
190
- self.category_id_to_category_name = None
191
-
192
- # ...__init__()
193
-
194
- # ...class SeparateDetectionsIntoFoldersOptions
195
-
196
-
197
- #%% Support functions
198
-
199
- def _path_is_abs(p): return (len(p) > 1) and (p[0] == '/' or p[1] == ':')
200
-
201
- printed_missing_file_warning = False
202
-
203
- def _process_detections(im,options):
204
- """
205
- Process all detections for a single image
206
-
207
- May modify *im*.
208
- """
209
-
210
- global printed_missing_file_warning
211
-
212
- relative_filename = im['file']
213
-
214
- detections = None
215
- if 'detections' in im:
216
- detections = im['detections']
217
-
218
- categories_above_threshold = None
219
-
220
- if detections is None:
221
-
222
- assert im['failure'] is not None and len(im['failure']) > 0
223
- target_folder = options.category_name_to_folder['failure']
224
-
225
- else:
226
-
227
- category_name_to_max_confidence = {}
228
- category_names = options.category_id_to_category_name.values()
229
- for category_name in category_names:
230
- category_name_to_max_confidence[category_name] = 0.0
231
-
232
- # Find the maximum confidence for each category
233
- #
234
- # det = detections[0]
235
- for det in detections:
236
-
237
- category_id = det['category']
238
-
239
- # For zero-confidence detections, we occasionally have leftover goop
240
- # from COCO classes
241
- if category_id not in options.category_id_to_category_name:
242
- print('Warning: unrecognized category {} in file {}'.format(
243
- category_id,relative_filename))
244
- # assert det['conf'] < invalid_category_epsilon
245
- continue
246
-
247
- category_name = options.category_id_to_category_name[category_id]
248
- if det['conf'] > category_name_to_max_confidence[category_name]:
249
- category_name_to_max_confidence[category_name] = det['conf']
250
-
251
- # ...for each detection on this image
252
-
253
- # Count the number of thresholds exceeded
254
- categories_above_threshold = []
255
- for category_name in category_names:
256
-
257
- threshold = options.category_name_to_threshold[category_name]
258
- assert threshold is not None
259
-
260
- max_confidence_this_category = category_name_to_max_confidence[category_name]
261
- if max_confidence_this_category >= threshold:
262
- categories_above_threshold.append(category_name)
263
-
264
- # ...for each category
265
-
266
- categories_above_threshold.sort()
267
-
268
- using_classification_folders = (options.classification_thresholds is not None and \
269
- len(options.classification_thresholds) > 0)
270
-
271
- # If this is above multiple thresholds
272
- if len(categories_above_threshold) > 1:
273
-
274
- # Currently "animal_person" images get put into the "animal_person" folder, even if we're
275
- # doing species-based separation. Ideally, we would optionally put these in either the "deer"
276
- # folder or a "deer_person" folder, but this is pretty esoteric, so not worrying about this
277
- # for now.
278
- target_folder = options.category_name_to_folder['_'.join(categories_above_threshold)]
279
-
280
- elif len(categories_above_threshold) == 0:
281
-
282
- target_folder = options.category_name_to_folder['empty']
283
-
284
- else:
285
-
286
- assert len(categories_above_threshold) == 1
287
-
288
- target_folder = options.category_name_to_folder[categories_above_threshold[0]]
289
-
290
- # Are we making species classification folders, and is this an animal?
291
- if ('animal' in categories_above_threshold) and (using_classification_folders):
292
-
293
- # Do we need to put this into a specific species folder?
294
-
295
- # Find the animal-class detections that are above threshold
296
- category_name_to_id = {v: k for k, v in options.category_id_to_category_name.items()}
297
- animal_category_id = category_name_to_id['animal']
298
- valid_animal_detections = [d for d in detections if \
299
- (d['category'] == animal_category_id and \
300
- d['conf'] >= options.category_name_to_threshold['animal'])]
301
-
302
- # Count the number of classification categories that are above threshold for at
303
- # least one detection
304
- classification_categories_above_threshold = set()
305
-
306
- # d = valid_animal_detections[0]
307
- for d in valid_animal_detections:
308
-
309
- if 'classifications' not in d or d['classifications'] is None:
310
- continue
311
-
312
- # classification = d['classifications'][0]
313
- for classification in d['classifications']:
314
-
315
- classification_category_id = classification[0]
316
- classification_confidence = classification[1]
317
-
318
- # Do we have a threshold for this category, and if so, is
319
- # this classification above threshold?
320
- assert options.classification_category_id_to_name is not None
321
- classification_category_name = \
322
- options.classification_category_id_to_name[classification_category_id]
323
- if (classification_category_name in options.classification_thresholds) and \
324
- (classification_confidence > \
325
- options.classification_thresholds[classification_category_name]):
326
- classification_categories_above_threshold.add(classification_category_name)
327
-
328
- # ...for each classification
329
-
330
- # ...for each detection
331
-
332
- if len(classification_categories_above_threshold) == 0:
333
- classification_folder_name = 'unclassified'
334
-
335
- elif len(classification_categories_above_threshold) > 1:
336
- classification_folder_name = 'multiple'
337
-
338
- else:
339
- assert len(classification_categories_above_threshold) == 1
340
- classification_folder_name = list(classification_categories_above_threshold)[0]
341
-
342
- target_folder = os.path.join(target_folder,classification_folder_name)
343
-
344
- # ...if we have to deal with classification subfolders
345
-
346
- # ...if we have 0/1/more categories above threshold
347
-
348
- # ...if this is/isn't a failure case
349
-
350
- source_path = os.path.join(options.base_input_folder,relative_filename)
351
- if not os.path.isfile(source_path):
352
- if not options.allow_missing_files:
353
- raise ValueError('Cannot find file {}'.format(source_path))
354
- else:
355
- if not printed_missing_file_warning:
356
- print('Warning: cannot find at least one file ({})'.format(source_path))
357
- printed_missing_file_warning = True
358
- return
359
-
360
- target_path = os.path.join(target_folder,relative_filename)
361
- if (not options.overwrite) and (os.path.isfile(target_path)):
362
- return
363
-
364
- target_dir = os.path.dirname(target_path)
365
- os.makedirs(target_dir,exist_ok=True)
366
-
367
- # Skip this image if it's empty and we're not processing empty images
368
- if ((categories_above_threshold is None) or (len(categories_above_threshold) == 0)) and \
369
- options.skip_empty_images:
370
- return
371
-
372
- # At this point, this image is getting copied; we may or may not also need to
373
- # draw bounding boxes.
374
-
375
- # Do a simple copy operation if we don't need to render any boxes
376
- if (not options.render_boxes) or \
377
- (categories_above_threshold is None) or \
378
- (len(categories_above_threshold) == 0):
379
-
380
- if options.move_images:
381
- shutil.move(source_path,target_path)
382
- else:
383
- shutil.copyfile(source_path,target_path)
384
-
385
- else:
386
-
387
- # Open the source image
388
- pil_image = vis_utils.load_image(source_path)
389
-
390
- # Render bounding boxes for each category separately, because
391
- # we allow different thresholds for each category.
392
-
393
- category_name_to_id = {v: k for k, v in options.category_id_to_category_name.items()}
394
- assert len(category_name_to_id) == len(options.category_id_to_category_name)
395
-
396
- classification_label_map = None
397
- if using_classification_folders:
398
- classification_label_map = options.classification_categories
399
-
400
- for category_name in categories_above_threshold:
401
-
402
- category_id = category_name_to_id[category_name]
403
- category_threshold = options.category_name_to_threshold[category_name]
404
- assert category_threshold is not None
405
- category_detections = [d for d in detections if d['category'] == category_id]
406
-
407
- # When we're not using classification folders, remove classification
408
- # information to maintain standard detection colors.
409
- if not using_classification_folders:
410
- for d in category_detections:
411
- if 'classifications' in d:
412
- del d['classifications']
413
-
414
- vis_utils.render_detection_bounding_boxes(
415
- category_detections,
416
- pil_image,
417
- label_map=options.detection_categories,
418
- classification_label_map=classification_label_map,
419
- confidence_threshold=category_threshold,
420
- thickness=options.line_thickness,
421
- expansion=options.box_expansion)
422
-
423
- # ...for each category
424
-
425
- # Try to preserve EXIF data and image quality when saving
426
- vis_utils.exif_preserving_save(pil_image,target_path)
427
-
428
- # ...if we don't/do need to render boxes
429
-
430
- # ...def _process_detections()
431
-
432
-
433
- #%% Main function
434
-
435
- def separate_detections_into_folders(options):
436
- """
437
- Given a .json file with batch processing results, separate the files in that
438
- set of results into folders that contain animals/people/vehicles/nothing,
439
- according to per-class thresholds. See the header comment of this module for
440
- more details about the output folder structure.
441
-
442
- Args:
443
- options (SeparateDetectionsIntoFoldersOptions): parameters guiding image
444
- separation, see the SeparateDetectionsIntoFoldersOptions documentation for specific
445
- options.
446
- """
447
-
448
- # Input validation
449
-
450
- # Currently we don't support moving (instead of copying) when we're also rendering
451
- # bounding boxes.
452
- assert not (options.render_boxes and options.move_images), \
453
- 'Cannot specify both render_boxes and move_images'
454
-
455
- # Create output folder if necessary
456
- if (os.path.isdir(options.base_output_folder)) and \
457
- (len(os.listdir(options.base_output_folder) ) > 0):
458
- if options.allow_existing_directory:
459
- print('Warning: target folder exists and is not empty... did ' + \
460
- 'you mean to delete an old version?')
461
- else:
462
- raise ValueError('Target folder exists and is not empty')
463
- os.makedirs(options.base_output_folder,exist_ok=True)
464
-
465
- # Load detection results
466
- print('Loading detection results')
467
- results = json.load(open(options.results_file))
468
- images = results['images']
469
-
470
- for im in images:
471
- fn = im['file']
472
- assert not _path_is_abs(fn), 'Cannot process results with absolute image paths'
473
-
474
- print('Processing detections for {} images'.format(len(images)))
475
-
476
- default_threshold = options.threshold
477
-
478
- if default_threshold is None:
479
- default_threshold = get_typical_confidence_threshold_from_results(results)
480
-
481
- detection_categories = results['detection_categories']
482
- options.detection_categories = detection_categories
483
- options.category_id_to_category_name = detection_categories
484
-
485
- # Map class names to output folders
486
- options.category_name_to_folder = {}
487
- options.category_name_to_folder['empty'] = os.path.join(options.base_output_folder,'empty')
488
- options.category_name_to_folder['failure'] =\
489
- os.path.join(options.base_output_folder,'processing_failure')
490
-
491
- # Create all combinations of categories
492
- category_names = list(detection_categories.values())
493
- category_names.sort()
494
-
495
- # category_name = category_names[0]
496
- for category_name in category_names:
497
-
498
- # Do we have a custom threshold for this category?
499
- assert category_name in options.category_name_to_threshold
500
- if options.category_name_to_threshold[category_name] is None:
501
- options.category_name_to_threshold[category_name] = default_threshold
502
-
503
- category_threshold = options.category_name_to_threshold[category_name]
504
- print('Processing category {} at threshold {}'.format(category_name,category_threshold))
505
-
506
- target_category_names = []
507
- for c in category_names:
508
-
509
- target_category_names.append(c)
510
-
511
- for combination_length in range(2,len(category_names)+1):
512
-
513
- combined_category_names = list(itertools.combinations(category_names,combination_length))
514
-
515
- for combination in combined_category_names:
516
- combined_name = '_'.join(combination)
517
- target_category_names.append(combined_name)
518
-
519
- # Create folder mappings for each category
520
- for category_name in target_category_names:
521
-
522
- folder_name = category_name
523
-
524
- if category_name in friendly_folder_names:
525
- folder_name = friendly_folder_names[category_name]
526
-
527
- options.category_name_to_folder[category_name] = \
528
- os.path.join(options.base_output_folder,folder_name)
529
-
530
- # Create the actual folders
531
- for folder in options.category_name_to_folder.values():
532
- os.makedirs(folder,exist_ok=True)
533
-
534
- # Handle species classification thresholds, if specified
535
- if options.classification_thresholds is not None:
536
-
537
- assert 'classification_categories' in results and \
538
- results['classification_categories'] is not None, \
539
- 'Classification thresholds specified, but no classification results available'
540
-
541
- classification_categories = results['classification_categories']
542
- classification_category_name_to_id = {v: k for k, v in classification_categories.items()}
543
- classification_category_id_to_name = {k: v for k, v in classification_categories.items()}
544
- options.classification_category_id_to_name = classification_category_id_to_name
545
- options.classification_categories = classification_categories
546
-
547
- if isinstance(options.classification_thresholds,str):
548
-
549
- # E.g. deer=0.75,cow=0.75
550
- tokens = options.classification_thresholds.split(',')
551
- classification_thresholds = {}
552
-
553
- # token = tokens[0]
554
- for token in tokens:
555
- subtokens = token.split('=')
556
- assert len(subtokens) == 2 and is_float(subtokens[1]), \
557
- 'Illegal classification threshold {}'.format(token)
558
- classification_thresholds[subtokens[0]] = float(subtokens[1])
559
-
560
- # ...for each token
561
-
562
- options.classification_thresholds = classification_thresholds
563
-
564
- # ...if classification thresholds are still in string format
565
-
566
- # Validate the classes in the threshold list
567
- for class_name in options.classification_thresholds.keys():
568
- assert class_name in classification_category_name_to_id, \
569
- 'Category {} specified at the command line, but is not available in the results file'.format(
570
- class_name)
571
-
572
- # ...if we need to deal with classification categories
573
-
574
- if options.n_threads <= 1 or options.debug_max_images is not None:
575
-
576
- # i_image = 14; im = images[i_image]; im
577
- for i_image,im in enumerate(tqdm(images)):
578
- if options.debug_max_images is not None and i_image > options.debug_max_images:
579
- break
580
- _process_detections(im,options)
581
- # ...for each image
582
-
583
- else:
584
-
585
- print('Starting a pool with {} threads'.format(options.n_threads))
586
- pool = ThreadPool(options.n_threads)
587
- process_detections_with_options = partial(_process_detections, options=options)
588
- _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
589
-
590
- # ...def separate_detections_into_folders
591
-
592
-
593
- #%% Interactive driver
594
-
595
- if False:
596
-
597
- pass
598
-
599
- #%%
600
-
601
- options = SeparateDetectionsIntoFoldersOptions()
602
-
603
- options.results_file = os.path.expanduser(
604
- '~/data/snapshot-safari-2022-08-16-KRU-v5a.0.0_detections.json')
605
- options.base_input_folder = os.path.expanduser('~/data/KRU/KRU_public')
606
- options.base_output_folder = os.path.expanduser('~/data/KRU-separated')
607
- options.n_threads = 100
608
- options.render_boxes = True
609
- options.allow_existing_directory = True
610
-
611
- #%%
612
-
613
- options = SeparateDetectionsIntoFoldersOptions()
614
-
615
- options.results_file = os.path.expanduser('~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json')
616
- options.base_input_folder = os.path.expanduser('~/data/ENA24/images')
617
- options.base_output_folder = os.path.expanduser('~/data/ENA24-separated')
618
- options.n_threads = 100
619
- options.classification_thresholds = 'deer=0.75,cow=0.75,bird=0.75'
620
- options.render_boxes = True
621
- options.allow_existing_directory = True
622
-
623
- #%%
624
-
625
- separate_detections_into_folders(options)
626
-
627
- #%% Testing various command-line invocations
628
-
629
- """
630
- # With boxes, no classification
631
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10
632
-
633
- # No boxes, no classification (default)
634
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory
635
-
636
- # With boxes, with classification
637
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --render_boxes --line_thickness 10 --box_expansion 10 --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
638
-
639
- # No boxes, with classification
640
- python separate_detections_into_folders.py ~/data/ena24-2022-06-15-v5a.0.0_megaclassifier.json ~/data/ENA24/images ~/data/ENA24-separated --threshold 0.17 --animal_threshold 0.2 --n_threads 10 --allow_existing_directory --classification_thresholds "deer=0.75,cow=0.75,bird=0.75"
641
- """
642
-
643
- #%% Command-line driver
644
-
645
- def main():
646
-
647
- parser = argparse.ArgumentParser()
648
- parser.add_argument('results_file', type=str, help='Input .json filename')
649
- parser.add_argument('base_input_folder', type=str, help='Input image folder')
650
- parser.add_argument('base_output_folder', type=str, help='Output image folder')
651
-
652
- parser.add_argument('--threshold', type=float, default=None,
653
- help='Default confidence threshold for all categories (defaults to ' + \
654
- 'selection based on model version, other options may override this ' + \
655
- 'for specific categories)')
656
- parser.add_argument('--animal_threshold', type=float, default=None,
657
- help='Confidence threshold for the animal category')
658
- parser.add_argument('--human_threshold', type=float, default=None,
659
- help='Confidence threshold for the human category')
660
- parser.add_argument('--vehicle_threshold', type=float, default=None,
661
- help='Confidence threshold for vehicle category')
662
- parser.add_argument('--classification_thresholds', type=str, default=None,
663
- help='List of classification thresholds to use for species-based folder ' + \
664
- 'separation, formatted as, e.g., "deer=0.75,cow=0.75"')
665
-
666
- parser.add_argument('--n_threads', type=int, default=1,
667
- help='Number of threads to use for parallel operation (default=1)')
668
-
669
- parser.add_argument('--allow_existing_directory', action='store_true',
670
- help='Proceed even if the target directory exists and is not empty')
671
- parser.add_argument('--no_overwrite', action='store_true',
672
- help='Skip images that already exist in the target folder, must also ' + \
673
- 'specify --allow_existing_directory')
674
- parser.add_argument('--skip_empty_images', action='store_true',
675
- help='Do not copy empty images to the output folder')
676
- parser.add_argument('--move_images', action='store_true',
677
- help='Move images (rather than copying) (not recommended this if you have not ' + \
678
- 'backed up your data!)')
679
-
680
- parser.add_argument('--render_boxes', action='store_true',
681
- help='Render bounding boxes on output images; may result in some ' + \
682
- 'metadata not being transferred')
683
- parser.add_argument('--line_thickness', type=int, default=default_line_thickness,
684
- help='Line thickness (in pixels) for rendering, only meaningful if ' + \
685
- 'using render_boxes (defaults to {})'.format(
686
- default_line_thickness))
687
- parser.add_argument('--box_expansion', type=int, default=default_line_thickness,
688
- help='Box expansion (in pixels) for rendering, only meaningful if ' + \
689
- 'using render_boxes (defaults to {})'.format(
690
- default_box_expansion))
691
-
692
- if len(sys.argv[1:])==0:
693
- parser.print_help()
694
- parser.exit()
695
-
696
- args = parser.parse_args()
697
-
698
- # Convert to an options object
699
- options = SeparateDetectionsIntoFoldersOptions()
700
-
701
- args_to_object(args, options)
702
-
703
- def validate_threshold(v,name):
704
- # print('{} {}'.format(v,name))
705
- if v is not None:
706
- assert v >= 0.0 and v <= 1.0, \
707
- 'Illegal {} threshold {}'.format(name,v)
708
-
709
- validate_threshold(args.threshold,'default')
710
- validate_threshold(args.animal_threshold,'animal')
711
- validate_threshold(args.vehicle_threshold,'vehicle')
712
- validate_threshold(args.human_threshold,'human')
713
-
714
- if args.threshold is not None:
715
- if args.animal_threshold is not None \
716
- and args.human_threshold is not None \
717
- and args.vehicle_threshold is not None:
718
- raise ValueError('Default threshold specified, but all category thresholds ' + \
719
- 'also specified... not exactly wrong, but it\'s likely that you ' + \
720
- 'meant something else.')
721
-
722
- options.category_name_to_threshold['animal'] = args.animal_threshold
723
- options.category_name_to_threshold['person'] = args.human_threshold
724
- options.category_name_to_threshold['vehicle'] = args.vehicle_threshold
725
-
726
- options.overwrite = (not args.no_overwrite)
727
-
728
- separate_detections_into_folders(options)
729
-
730
- if __name__ == '__main__':
731
- main()