megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,516 +0,0 @@
1
- """
2
-
3
- crop_detections.py
4
-
5
- Given a detections JSON file from MegaDetector, crops the bounding boxes above
6
- a certain confidence threshold.
7
-
8
- This script takes as input a detections JSON file, usually the output of
9
- detection/run_tf_detector_batch.py or the output of the Batch API in the
10
- "Batch processing API output format".
11
-
12
- See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
13
-
14
- The script can crop images that are either available locally or that need to be
15
- downloaded from an Azure Blob Storage container.
16
-
17
- We assume that no image contains over 100 bounding boxes, and we always save
18
- crops as RGB .jpg files for consistency. For each image, each bounding box is
19
- cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
20
- filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
21
- ndicates the MegaDetector version. Based on the given confidence threshold, we
22
- may skip saving certain bounding box crops, but we still increment the bounding
23
- box number for skipped boxes.
24
-
25
- Example cropped image path (with MegaDetector bbox):
26
-
27
- "path/to/image.jpg___crop00_mdv4.1.jpg"
28
-
29
- By default, the images are cropped exactly per the given bounding box
30
- coordinates. However, if square crops are desired, pass the --square-crops
31
- flag. This will always generate a square crop whose size is the larger of the
32
- bounding box width or height. In the case that the square crop boundaries exceed
33
- the original image size, the crop is padded with 0s.
34
-
35
- This script outputs a log file to:
36
-
37
- <output_dir>/crop_detections_log_{timestamp}.json
38
-
39
- ...which contains images that failed to download and crop properly.
40
-
41
- """
42
-
43
- #%% Imports
44
-
45
- from __future__ import annotations
46
-
47
- import argparse
48
- from collections.abc import Iterable, Mapping, Sequence
49
- from concurrent import futures
50
- from datetime import datetime
51
- import io
52
- import json
53
- import os
54
- from typing import Any, BinaryIO, Optional
55
-
56
- from azure.storage.blob import ContainerClient
57
- from PIL import Image, ImageOps
58
- from tqdm import tqdm
59
-
60
-
61
- #%% Example usage
62
-
63
- """
64
- python crop_detections.py \
65
- detections.json \
66
- /path/to/crops \
67
- --images-dir /path/to/images \
68
- --container-url "https://account.blob.core.windows.net/container?sastoken" \
69
- --detector-version "4.1" \
70
- --threshold 0.8 \
71
- --save-full-images --square-crops \
72
- --threads 50 \
73
- --logdir "."
74
- """
75
-
76
-
77
- #%% Main function
78
-
79
- def main(detections_json_path: str,
80
- cropped_images_dir: str,
81
- images_dir: Optional[str],
82
- container_url: Optional[str],
83
- detector_version: Optional[str],
84
- save_full_images: bool,
85
- square_crops: bool,
86
- check_crops_valid: bool,
87
- confidence_threshold: float,
88
- threads: int,
89
- logdir: str) -> None:
90
- """
91
- Args:
92
- detections_json_path: str, path to detections JSON file
93
- cropped_images_dir: str, path to local directory for saving crops of
94
- bounding boxes
95
- images_dir: optional str, path to local directory where images are saved
96
- container_url: optional str, URL (with SAS token, if necessary) of Azure
97
- Blob Storage container to download images from, if images are not
98
- all already locally available in <images_dir>
99
- detector_version: str, detector version string, e.g., '4.1',
100
- see {batch_detection_api_url}/supported_model_versions
101
- save_full_images: bool, whether to save downloaded images to images_dir,
102
- images_dir must be given if save_full_images=True
103
- square_crops: bool, whether to crop bounding boxes as squares
104
- check_crops_valid: bool, whether to load each crop to ensure the file is
105
- valid (i.e., not truncated)
106
- confidence_threshold: float, only crop bounding boxes above this value
107
- threads: int, number of threads to use for downloading images
108
- logdir: str, path to directory to save log file
109
- """
110
-
111
- # error checking
112
- assert 0 <= confidence_threshold <= 1, \
113
- 'Invalid confidence threshold {}'.format(confidence_threshold)
114
- if save_full_images:
115
- assert images_dir is not None, \
116
- 'save_full_images specified but no images_dir provided'
117
- if not os.path.exists(images_dir):
118
- os.makedirs(images_dir, exist_ok=True)
119
- print(f'Created images_dir at {images_dir}')
120
-
121
- # load detections JSON
122
- with open(detections_json_path, 'r') as f:
123
- js = json.load(f)
124
- detections = {img['file']: img for img in js['images']}
125
- detection_categories = js['detection_categories']
126
-
127
- # get detector version
128
- if 'info' in js and 'detector' in js['info']:
129
- api_det_version = js['info']['detector'] # .rsplit('v', maxsplit=1)[1]
130
- if detector_version is not None:
131
- assert api_det_version == detector_version,\
132
- '.json file specifies a detector version of {}, but the caller has specified {}'.format(
133
- api_det_version,detector_version)
134
- else:
135
- detector_version = api_det_version
136
- if detector_version is None:
137
- detector_version = 'unknown'
138
-
139
- # convert from category ID to category name
140
- images_missing_detections = []
141
-
142
- # copy keys to modify dict in-place
143
- for img_path in list(detections.keys()):
144
- info_dict = detections[img_path]
145
- if 'detections' not in info_dict or info_dict['detections'] is None:
146
- del detections[img_path]
147
- images_missing_detections.append(img_path)
148
- continue
149
- for d in info_dict['detections']:
150
- if d['category'] not in detection_categories:
151
- print('Warning: ignoring detection with category {} for image {}'.format(
152
- d['category'],img_path))
153
- # This will be removed later when we filter for animals
154
- d['category'] = 'unsupported'
155
- else:
156
- d['category'] = detection_categories[d['category']]
157
-
158
- images_failed_dload_crop, num_downloads, num_crops = download_and_crop(
159
- detections=detections,
160
- cropped_images_dir=cropped_images_dir,
161
- images_dir=images_dir,
162
- container_url=container_url,
163
- detector_version=detector_version,
164
- confidence_threshold=confidence_threshold,
165
- save_full_images=save_full_images,
166
- square_crops=square_crops,
167
- check_crops_valid=check_crops_valid,
168
- threads=threads)
169
- print(f'{len(images_failed_dload_crop)} images failed to download or crop.')
170
-
171
- # save log of bad images
172
- log = {
173
- 'images_missing_detections': images_missing_detections,
174
- 'images_failed_download_or_crop': images_failed_dload_crop,
175
- 'num_new_downloads': num_downloads,
176
- 'num_new_crops': num_crops
177
- }
178
- os.makedirs(logdir, exist_ok=True)
179
- date = datetime.now().strftime('%Y%m%d_%H%M%S') # e.g., '20200722_110816'
180
- log_path = os.path.join(logdir, f'crop_detections_log_{date}.json')
181
- with open(log_path, 'w') as f:
182
- json.dump(log, f, indent=1)
183
-
184
-
185
- #%% Support functions
186
-
187
- def download_and_crop(
188
- detections: Mapping[str, Mapping[str, Any]],
189
- cropped_images_dir: str,
190
- images_dir: Optional[str],
191
- container_url: Optional[str],
192
- detector_version: str,
193
- confidence_threshold: float,
194
- save_full_images: bool,
195
- square_crops: bool,
196
- check_crops_valid: bool,
197
- threads: int = 1
198
- ) -> tuple[list[str], int, int]:
199
- """
200
- Saves crops to a file with the same name as the original image with an
201
- additional suffix appended, starting with 3 underscores:
202
- - if image has ground truth bboxes: "___cropXX.jpg", where "XX" indicates
203
- the bounding box index
204
- - if image has bboxes from MegaDetector: "___cropXX_mdvY.Y.jpg", where
205
- "Y.Y" indicates the MegaDetector version
206
- See module docstring for more info and examples.
207
-
208
- Args:
209
- detections: dict, maps image paths to info dict
210
- {
211
- "detections": [{
212
- "category": "animal", # must be name, not "1" or "2"
213
- "conf": 0.926,
214
- "bbox": [0.0, 0.2762, 0.1539, 0.2825],
215
- }],
216
- "is_ground_truth": True # whether bboxes are ground truth
217
- }
218
- cropped_images_dir: str, path to folder where cropped images are saved
219
- images_dir: optional str, path to folder where full images are saved
220
- container_url: optional str, URL (with SAS token, if necessary) of Azure
221
- Blob Storage container to download images from, if images are not
222
- all already locally available in <images_dir>
223
- detector_version: str, detector version string, e.g., '4.1'
224
- confidence_threshold: float, only crop bounding boxes above this value
225
- save_full_images: bool, whether to save downloaded images to images_dir,
226
- images_dir must be given and must exist if save_full_images=True
227
- square_crops: bool, whether to crop bounding boxes as squares
228
- check_crops_valid: bool, whether to load each crop to ensure the file is
229
- valid (i.e., not truncated)
230
- threads: int, number of threads to use for downloading images
231
-
232
- Returns:
233
- images_failed_download: list of str, images with bounding boxes that
234
- failed to download or crop properly
235
- total_downloads: int, number of images downloaded
236
- total_new_crops: int, number of new crops saved to cropped_images_dir
237
- """
238
-
239
- # True for ground truth, False for MegaDetector
240
- # always save as .jpg for consistency
241
- crop_path_template = {
242
- True: os.path.join(cropped_images_dir, '{img_path}___crop{n:>02d}.jpg'),
243
- False: os.path.join(
244
- cropped_images_dir,
245
- '{img_path}___crop{n:>02d}_' + f'{detector_version}.jpg')
246
- }
247
-
248
- pool = futures.ThreadPoolExecutor(max_workers=threads)
249
- future_to_img_path = {}
250
- images_failed_download = []
251
-
252
- container_client = None
253
- if container_url is not None:
254
- container_client = ContainerClient.from_container_url(container_url)
255
-
256
- print(f'Getting bbox info for {len(detections)} images...')
257
- for img_path in tqdm(sorted(detections.keys())):
258
- # we already did all error checking above, so we don't do any here
259
- info_dict = detections[img_path]
260
- bbox_dicts = info_dict['detections']
261
- is_ground_truth = info_dict.get('is_ground_truth', False)
262
-
263
- # get the image, either from disk or from Blob Storage
264
- future = pool.submit(
265
- load_and_crop, img_path, images_dir, container_client, bbox_dicts,
266
- confidence_threshold, crop_path_template[is_ground_truth],
267
- save_full_images, square_crops, check_crops_valid)
268
- future_to_img_path[future] = img_path
269
-
270
- total = len(future_to_img_path)
271
- total_downloads = 0
272
- total_new_crops = 0
273
- print(f'Reading/downloading {total} images and cropping...')
274
- for future in tqdm(futures.as_completed(future_to_img_path), total=total):
275
- img_path = future_to_img_path[future]
276
- try:
277
- did_download, num_new_crops = future.result()
278
- total_downloads += did_download
279
- total_new_crops += num_new_crops
280
- except Exception as e: # pylint: disable=broad-except
281
- exception_type = type(e).__name__
282
- tqdm.write(f'{img_path} - generated {exception_type}: {e}')
283
- images_failed_download.append(img_path)
284
-
285
- pool.shutdown()
286
- if container_client is not None:
287
- # inelegant way to close the container_client
288
- with container_client:
289
- pass
290
-
291
- print(f'Downloaded {total_downloads} images.')
292
- print(f'Made {total_new_crops} new crops.')
293
- return images_failed_download, total_downloads, total_new_crops
294
-
295
-
296
- def load_local_image(img_path: str | BinaryIO) -> Optional[Image.Image]:
297
- """
298
- Attempts to load an image from a local path.
299
- """
300
-
301
- try:
302
- with Image.open(img_path) as img:
303
- img.load()
304
- return img
305
- except OSError as e: # PIL.UnidentifiedImageError is a subclass of OSError
306
- exception_type = type(e).__name__
307
- tqdm.write(f'Unable to load {img_path}. {exception_type}: {e}.')
308
- return None
309
-
310
-
311
- def load_and_crop(img_path: str,
312
- images_dir: Optional[str],
313
- container_client: Optional[ContainerClient],
314
- bbox_dicts: Iterable[Mapping[str, Any]],
315
- confidence_threshold: float,
316
- crop_path_template: str,
317
- save_full_image: bool,
318
- square_crops: bool,
319
- check_crops_valid: bool) -> tuple[bool, int]:
320
- """
321
- Given an image and a list of bounding boxes, checks if the crops already
322
- exist. If not, loads the image locally or Azure Blob Storage, then crops it.
323
-
324
- local image path: <images_dir>/<img_path>
325
- Azure storage: <img_path> as the blob name inside the container
326
-
327
- An image is only downloaded from Azure Blob Storage if it does not already
328
- exist locally and if it has at least 1 bounding box with confidence greater
329
- than the confidence threshold.
330
-
331
- Args:
332
- img_path: str, image path
333
- images_dir: optional str, path to local directory of images, and where
334
- full images are saved if save_full_images=True
335
- container_client: optional ContainerClient, this function does not
336
- use container_client in any context manager
337
- bbox_dicts: list of dicts, each dict contains info on a bounding box
338
- confidence_threshold: float, only crop bounding boxes above this value
339
- crop_path_template: str, contains placeholders {img_path} and {n}
340
- save_full_images: bool, whether to save downloaded images to images_dir,
341
- images_dir must be given and must exist if save_full_images=True
342
- square_crops: bool, whether to crop bounding boxes as squares
343
- check_crops_valid: bool, whether to load each crop to ensure the file is
344
- valid (i.e., not truncated)
345
-
346
- Returns:
347
- did_download: bool, whether image was downloaded from Azure Blob Storage
348
- num_new_crops: int, number of new crops successfully saved
349
- """
350
-
351
- did_download = False
352
- num_new_crops = 0
353
-
354
- # crop_path => normalized bbox coordinates [xmin, ymin, width, height]
355
- bboxes_tocrop: dict[str, list[float]] = {}
356
- for i, bbox_dict in enumerate(bbox_dicts):
357
- # only ground-truth bboxes do not have a "confidence" value
358
- if 'conf' in bbox_dict and bbox_dict['conf'] < confidence_threshold:
359
- continue
360
- if bbox_dict['category'] != 'animal':
361
- continue
362
- crop_path = crop_path_template.format(img_path=img_path, n=i)
363
- if not os.path.exists(crop_path) or (
364
- check_crops_valid and load_local_image(crop_path) is None):
365
- bboxes_tocrop[crop_path] = bbox_dict['bbox']
366
- if len(bboxes_tocrop) == 0:
367
- return did_download, num_new_crops
368
-
369
- img = None
370
-
371
- # try loading image from local directory
372
- if images_dir is not None:
373
- full_img_path = os.path.join(images_dir, img_path)
374
- debug_path = full_img_path
375
- if os.path.exists(full_img_path):
376
- img = load_local_image(full_img_path)
377
-
378
- # try to download image from Blob Storage
379
- if img is None and container_client is not None:
380
- debug_path = img_path
381
- with io.BytesIO() as stream:
382
- container_client.download_blob(img_path).readinto(stream)
383
- stream.seek(0)
384
-
385
- if save_full_image:
386
- os.makedirs(os.path.dirname(full_img_path), exist_ok=True)
387
- with open(full_img_path, 'wb') as f:
388
- f.write(stream.read())
389
- stream.seek(0)
390
-
391
- img = load_local_image(stream)
392
- did_download = True
393
-
394
- assert img is not None, 'image "{}" failed to load or download properly'.format(
395
- debug_path)
396
-
397
- if img.mode != 'RGB':
398
- img = img.convert(mode='RGB') # always save as RGB for consistency
399
-
400
- # crop the image
401
- for crop_path, bbox in bboxes_tocrop.items():
402
- num_new_crops += save_crop(
403
- img, bbox_norm=bbox, square_crop=square_crops, save=crop_path)
404
- return did_download, num_new_crops
405
-
406
-
407
- def save_crop(img: Image.Image, bbox_norm: Sequence[float], square_crop: bool,
408
- save: str) -> bool:
409
- """
410
- Crops an image and saves the crop to file.
411
-
412
- Args:
413
- img: PIL.Image.Image object, already loaded
414
- bbox_norm: list or tuple of float, [xmin, ymin, width, height] all in
415
- normalized coordinates
416
- square_crop: bool, whether to crop bounding boxes as a square
417
- save: str, path to save cropped image
418
-
419
- Returns: bool, True if a crop was saved, False otherwise
420
- """
421
-
422
- img_w, img_h = img.size
423
- xmin = int(bbox_norm[0] * img_w)
424
- ymin = int(bbox_norm[1] * img_h)
425
- box_w = int(bbox_norm[2] * img_w)
426
- box_h = int(bbox_norm[3] * img_h)
427
-
428
- if square_crop:
429
- # expand box width or height to be square, but limit to img size
430
- box_size = max(box_w, box_h)
431
- xmin = max(0, min(
432
- xmin - int((box_size - box_w) / 2),
433
- img_w - box_w))
434
- ymin = max(0, min(
435
- ymin - int((box_size - box_h) / 2),
436
- img_h - box_h))
437
- box_w = min(img_w, box_size)
438
- box_h = min(img_h, box_size)
439
-
440
- if box_w == 0 or box_h == 0:
441
- tqdm.write(f'Skipping size-0 crop (w={box_w}, h={box_h}) at {save}')
442
- return False
443
-
444
- # Image.crop() takes box=[left, upper, right, lower]
445
- crop = img.crop(box=[xmin, ymin, xmin + box_w, ymin + box_h])
446
-
447
- if square_crop and (box_w != box_h):
448
- # pad to square using 0s
449
- crop = ImageOps.pad(crop, size=(box_size, box_size), color=0)
450
-
451
- os.makedirs(os.path.dirname(save), exist_ok=True)
452
- crop.save(save)
453
- return True
454
-
455
-
456
- #%% Command-line driver
457
-
458
- def _parse_args() -> argparse.Namespace:
459
-
460
- parser = argparse.ArgumentParser(
461
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
462
- description='Crop detections from MegaDetector.')
463
- parser.add_argument(
464
- 'detections_json',
465
- help='path to detections JSON file')
466
- parser.add_argument(
467
- 'cropped_images_dir',
468
- help='path to local directory for saving crops of bounding boxes')
469
- parser.add_argument(
470
- '-i', '--images-dir',
471
- help='path to directory where full images are already available, '
472
- 'or where images will be written if --save-full-images is set')
473
- parser.add_argument(
474
- '-c', '--container-url',
475
- help='URL (including SAS token, if necessary) of Azure Blob Storage '
476
- 'container to download images from, if images are not all already '
477
- 'locally available in <images_dir>')
478
- parser.add_argument(
479
- '-v', '--detector-version',
480
- help='detector version string, e.g., "4.1", used if detector version '
481
- 'cannot be inferred from detections JSON')
482
- parser.add_argument(
483
- '--save-full-images', action='store_true',
484
- help='forces downloading of full images to --images-dir')
485
- parser.add_argument(
486
- '--square-crops', action='store_true',
487
- help='crop bounding boxes as squares')
488
- parser.add_argument(
489
- '--check-crops-valid', action='store_true',
490
- help='load each crop to ensure file is valid (i.e., not truncated)')
491
- parser.add_argument(
492
- '-t', '--threshold', type=float, default=0.0,
493
- help='confidence threshold above which to crop bounding boxes')
494
- parser.add_argument(
495
- '-n', '--threads', type=int, default=1,
496
- help='number of threads to use for downloading and cropping images')
497
- parser.add_argument(
498
- '--logdir', default='.',
499
- help='path to directory to save log file')
500
- return parser.parse_args()
501
-
502
-
503
- if __name__ == '__main__':
504
-
505
- args = _parse_args()
506
- main(detections_json_path=args.detections_json,
507
- cropped_images_dir=args.cropped_images_dir,
508
- images_dir=args.images_dir,
509
- container_url=args.container_url,
510
- detector_version=args.detector_version,
511
- save_full_images=args.save_full_images,
512
- square_crops=args.square_crops,
513
- check_crops_valid=args.check_crops_valid,
514
- confidence_threshold=args.threshold,
515
- threads=args.threads,
516
- logdir=args.logdir)