megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
@@ -1,10 +1,10 @@
1
- ########
2
- #
3
- # visualization_utils.py
4
- #
5
- # Core rendering functions shared across visualization scripts
6
- #
7
- ########
1
+ """
2
+
3
+ visualization_utils.py
4
+
5
+ Rendering functions shared across visualization scripts
6
+
7
+ """
8
8
 
9
9
  #%% Constants and imports
10
10
 
@@ -12,9 +12,9 @@ import time
12
12
  import numpy as np
13
13
  import requests
14
14
  import os
15
+ import cv2
15
16
 
16
17
  from io import BytesIO
17
- from typing import Union
18
18
  from PIL import Image, ImageFile, ImageFont, ImageDraw
19
19
  from multiprocessing.pool import ThreadPool
20
20
  from multiprocessing.pool import Pool
@@ -47,18 +47,49 @@ DEFAULT_DETECTOR_LABEL_MAP = {
47
47
  str(k): v for k, v in detector_bbox_category_id_to_name.items()
48
48
  }
49
49
 
50
- # Retry on blob storage read failures
50
+ # Constants controlling retry behavior when fetching images from URLs
51
51
  n_retries = 10
52
52
  retry_sleep_time = 0.01
53
+
54
+ # If we try to open an image from a URL, and we encounter any error in this list,
55
+ # we'll retry, otherwise it's just an error.
53
56
  error_names_for_retry = ['ConnectionError']
54
57
 
55
58
  DEFAULT_BOX_THICKNESS = 4
56
59
  DEFAULT_LABEL_FONT_SIZE = 16
57
60
 
61
+ # Default color map for mapping integer category IDs to colors when rendering bounding
62
+ # boxes
63
+ DEFAULT_COLORS = [
64
+ 'AliceBlue', 'Red', 'RoyalBlue', 'Gold', 'Chartreuse', 'Aqua', 'Azure',
65
+ 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
66
+ 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson',
67
+ 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
68
+ 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
69
+ 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
70
+ 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'GoldenRod',
71
+ 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
72
+ 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
73
+ 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
74
+ 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
75
+ 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
76
+ 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
77
+ 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
78
+ 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
79
+ 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
80
+ 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
81
+ 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
82
+ 'RosyBrown', 'Aquamarine', 'SaddleBrown', 'Green', 'SandyBrown',
83
+ 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
84
+ 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
85
+ 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
86
+ 'WhiteSmoke', 'Yellow', 'YellowGreen'
87
+ ]
88
+
58
89
 
59
90
  #%% Functions
60
91
 
61
- def open_image(input_file: Union[str, BytesIO], ignore_exif_rotation=False) -> Image:
92
+ def open_image(input_file, ignore_exif_rotation=False):
62
93
  """
63
94
  Opens an image in binary format using PIL.Image and converts to RGB mode.
64
95
 
@@ -69,11 +100,13 @@ def open_image(input_file: Union[str, BytesIO], ignore_exif_rotation=False) -> I
69
100
  errors can show up later. load_image() is the non-lazy version of this function.
70
101
 
71
102
  Args:
72
- input_file: str or BytesIO, either a path to an image file (anything
73
- that PIL can open), or an image as a stream of bytes
103
+ input_file (str or BytesIO): can be a path to an image file (anything
104
+ that PIL can open), a URL, or an image as a stream of bytes
105
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
106
+ even if we are loading a JPEG and that JPEG says it should be rotated
74
107
 
75
108
  Returns:
76
- A PIL image object in RGB mode
109
+ PIL.Image.Image: A PIL Image object in RGB mode
77
110
  """
78
111
 
79
112
  if (isinstance(input_file, str)
@@ -104,6 +137,8 @@ def open_image(input_file: Union[str, BytesIO], ignore_exif_rotation=False) -> I
104
137
 
105
138
  else:
106
139
  image = Image.open(input_file)
140
+
141
+ # Convert to RGB if necessary
107
142
  if image.mode not in ('RGBA', 'RGB', 'L', 'I;16'):
108
143
  raise AttributeError(
109
144
  f'Image {input_file} uses unsupported mode {image.mode}')
@@ -134,7 +169,7 @@ def open_image(input_file: Union[str, BytesIO], ignore_exif_rotation=False) -> I
134
169
 
135
170
  def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85,verbose=False):
136
171
  """
137
- Save [pil_image] to [output_file], making a moderate attempt to preserve EXIF
172
+ Saves [pil_image] to [output_file], making a moderate attempt to preserve EXIF
138
173
  data and JPEG quality. Neither is guaranteed.
139
174
 
140
175
  Also see:
@@ -143,12 +178,15 @@ def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85
143
178
 
144
179
  ...for more ways to preserve jpeg quality if quality='keep' doesn't do the trick.
145
180
 
146
- The "quality" parameter should be "keep" (default), or an integer from 0 to 100.
147
- This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
148
- and resize it in memory, for example, it's no longer a JPEG.
149
-
150
- 'default_quality' is used when quality == 'keep' and we are saving a non-JPEG source.
151
- 'keep' is only supported for JPEG sources.
181
+ Args:
182
+ pil_image (Image): the PIL Image objct to save
183
+ output_file (str): the destination file
184
+ quality (str or int, optional): can be "keep" (default), or an integer from 0 to 100.
185
+ This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
186
+ and resize it in memory, for example, it's no longer a JPEG.
187
+ default_quality (int, optional): determines output quality when quality == 'keep' and we are
188
+ saving a non-JPEG source to a JPEG file
189
+ verbose (bool, optional): enable additional debug console output
152
190
  """
153
191
 
154
192
  # Read EXIF metadata
@@ -185,18 +223,19 @@ def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85
185
223
  # ...def exif_preserving_save(...)
186
224
 
187
225
 
188
- def load_image(input_file: Union[str, BytesIO], ignore_exif_rotation=False) -> Image:
226
+ def load_image(input_file, ignore_exif_rotation=False):
189
227
  """
190
- Loads the image at input_file as a PIL Image into memory.
191
-
192
- Image.open() used in open_image() is lazy and errors will occur downstream
193
- if not explicitly loaded.
194
-
228
+ Loads an image file. This is the non-lazy version of open_file(); i.e.,
229
+ it forces image decoding before returning.
230
+
195
231
  Args:
196
- input_file: str or BytesIO, either a path to an image file (anything
197
- that PIL can open), or an image as a stream of bytes
232
+ input_file (str or BytesIO): can be a path to an image file (anything
233
+ that PIL can open), a URL, or an image as a stream of bytes
234
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
235
+ even if we are loading a JPEG and that JPEG says it should be rotated
198
236
 
199
- Returns: PIL.Image.Image, in RGB mode
237
+ Returns:
238
+ PIL.Image.Image: a PIL Image object in RGB mode
200
239
  """
201
240
 
202
241
  image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
@@ -204,23 +243,35 @@ def load_image(input_file: Union[str, BytesIO], ignore_exif_rotation=False) -> I
204
243
  return image
205
244
 
206
245
 
207
- def resize_image(image, target_width, target_height=-1, output_file=None,
246
+ def resize_image(image, target_width=-1, target_height=-1, output_file=None,
208
247
  no_enlarge_width=False, verbose=False, quality='keep'):
209
248
  """
210
- Resizes a PIL image object to the specified width and height; does not resize
249
+ Resizes a PIL Image object to the specified width and height; does not resize
211
250
  in place. If either width or height are -1, resizes with aspect ratio preservation.
212
251
 
213
- None is equivalent to -1 for target_width and target_height.
214
-
215
- [image] can be a PIL image or a filename.
216
-
217
252
  If target_width and target_height are both -1, does not modify the image, but
218
253
  will write to output_file if supplied.
219
254
 
220
- If no_enlarge_width is True, and the target width is larger than the original
221
- image width, does not modify the image, but will write to output_file if supplied.
255
+ If no resizing is required, and an Image object is supplied, returns the original Image
256
+ object (i.e., does not copy).
222
257
 
223
- 'quality' is passed to exif_preserving_save, see docs there.
258
+ Args:
259
+ image (Image or str): PIL Image object or a filename (local file or URL)
260
+ target_width (int, optional): width to which we should resize this image, or -1
261
+ to let target_height determine the size
262
+ target_height (int, optional): height to which we should resize this image, or -1
263
+ to let target_width determine the size
264
+ output_file (str, optional): file to which we should save this image; if None,
265
+ just returns the image without saving
266
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
267
+ [target width] is larger than the original image width, does not modify the image,
268
+ but will write to output_file if supplied
269
+ verbose (bool, optional): enable additional debug output
270
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
271
+
272
+ returns:
273
+ PIL.Image.Image: the resized image, which may be the original image if no resizing is
274
+ required
224
275
  """
225
276
 
226
277
  image_fn = 'in_memory'
@@ -295,42 +346,22 @@ def resize_image(image, target_width, target_height=-1, output_file=None,
295
346
  # ...def resize_image(...)
296
347
 
297
348
 
298
- DEFAULT_COLORS = [
299
- 'AliceBlue', 'Red', 'RoyalBlue', 'Gold', 'Chartreuse', 'Aqua', 'Azure',
300
- 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
301
- 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson',
302
- 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
303
- 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
304
- 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
305
- 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'GoldenRod',
306
- 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
307
- 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
308
- 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
309
- 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
310
- 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
311
- 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
312
- 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
313
- 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
314
- 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
315
- 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
316
- 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
317
- 'RosyBrown', 'Aquamarine', 'SaddleBrown', 'Green', 'SandyBrown',
318
- 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
319
- 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
320
- 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
321
- 'WhiteSmoke', 'Yellow', 'YellowGreen'
322
- ]
323
-
324
-
325
349
  def crop_image(detections, image, confidence_threshold=0.15, expansion=0):
326
350
  """
327
- Crops detections above *confidence_threshold* from the PIL image *image*,
328
- returning a list of PIL images.
351
+ Crops detections above [confidence_threshold] from the PIL image [image],
352
+ returning a list of PIL Images.
329
353
 
330
- *detections* should be a list of dictionaries with keys 'conf' and 'bbox';
331
- see bbox format description below. Normalized, [x,y,w,h], upper-left-origin.
332
-
333
- *expansion* specifies a number of pixels to include on each side of the box.
354
+ Args:
355
+ detections (list): a list of dictionaries with keys 'conf' and 'bbox';
356
+ boxes are length-four arrays formatted as [x,y,w,h], normalized,
357
+ upper-left origin (this is the standard MD detection format)
358
+ image (Image): the PIL Image object from which we should crop detections
359
+ confidence_threshold (float, optional): only crop detections above this threshold
360
+ expansion (int, optional): a number of pixels to include on each side of a cropped
361
+ detection
362
+
363
+ Returns:
364
+ list: a possibly-empty list of PIL Image objects
334
365
  """
335
366
 
336
367
  ret_images = []
@@ -372,91 +403,112 @@ def crop_image(detections, image, confidence_threshold=0.15, expansion=0):
372
403
  return ret_images
373
404
 
374
405
 
375
- def render_detection_bounding_boxes(detections, image,
376
- label_map={},
406
+ def render_detection_bounding_boxes(detections,
407
+ image,
408
+ label_map='show_categories',
377
409
  classification_label_map=None,
378
- confidence_threshold=0.15, thickness=DEFAULT_BOX_THICKNESS, expansion=0,
410
+ confidence_threshold=0.15,
411
+ thickness=DEFAULT_BOX_THICKNESS,
412
+ expansion=0,
379
413
  classification_confidence_threshold=0.3,
380
414
  max_classifications=3,
381
- colormap=DEFAULT_COLORS,
415
+ colormap=None,
382
416
  textalign=TEXTALIGN_LEFT,
383
417
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
384
418
  custom_strings=None):
385
419
  """
386
- Renders bounding boxes, label, and confidence on an image if confidence is above the threshold.
387
-
388
- Boxes are in the format that's output from the batch processing API.
389
-
420
+ Renders bounding boxes (with labels and confidence values) on an image for all
421
+ detections above a threshold.
422
+
390
423
  Renders classification labels if present.
424
+
425
+ [image] is modified in place.
391
426
 
392
427
  Args:
393
428
 
394
- detections: detections on the image, example content:
395
- [
396
- {
397
- "category": "2",
398
- "conf": 0.996,
399
- "bbox": [
400
- 0.0,
401
- 0.2762,
402
- 0.1234,
403
- 0.2458
404
- ]
405
- }
406
- ]
407
-
408
- ...where the bbox coordinates are [x, y, box_width, box_height].
409
-
410
- (0, 0) is the upper-left. Coordinates are normalized.
411
-
412
- Supports classification results, if *detections* has the format
413
- [
414
- {
415
- "category": "2",
416
- "conf": 0.996,
417
- "bbox": [
418
- 0.0,
419
- 0.2762,
420
- 0.1234,
421
- 0.2458
422
- ]
423
- "classifications": [
424
- ["3", 0.901],
425
- ["1", 0.071],
426
- ["4", 0.025]
427
- ]
428
- }
429
- ]
429
+ detections (list): list of detections in the MD output format, for example:
430
+
431
+ .. code-block::none
432
+
433
+ [
434
+ {
435
+ "category": "2",
436
+ "conf": 0.996,
437
+ "bbox": [
438
+ 0.0,
439
+ 0.2762,
440
+ 0.1234,
441
+ 0.2458
442
+ ]
443
+ }
444
+ ]
445
+
446
+ ...where the bbox coordinates are [x, y, box_width, box_height].
447
+
448
+ (0, 0) is the upper-left. Coordinates are normalized.
449
+
450
+ Supports classification results, in the standard format:
451
+
452
+ .. code-block::none
453
+
454
+ [
455
+ {
456
+ "category": "2",
457
+ "conf": 0.996,
458
+ "bbox": [
459
+ 0.0,
460
+ 0.2762,
461
+ 0.1234,
462
+ 0.2458
463
+ ]
464
+ "classifications": [
465
+ ["3", 0.901],
466
+ ["1", 0.071],
467
+ ["4", 0.025]
468
+ ]
469
+ }
470
+ ]
430
471
 
431
- image: PIL.Image object
472
+ image (PIL.Image.Image): image on which we should render detections
432
473
 
433
- label_map: optional, mapping the numerical label to a string name. The type of the numerical label
434
- (default string) needs to be consistent with the keys in label_map; no casting is carried out.
435
- If this is None, no labels are shown (not even numbers and confidence values). If you want
436
- category numbers and confidence values without class labels, use {}.
474
+ label_map (dict, optional): optional, mapping the numeric label to a string name. The type of the
475
+ numeric label (typically strings) needs to be consistent with the keys in label_map; no casting is
476
+ carried out. If [label_map] is None, no labels are shown (not even numbers and confidence values).
477
+ If you want category numbers and confidence values without class labels, use the default value,
478
+ the string 'show_categories'.
437
479
 
438
- classification_label_map: optional, mapping of the string class labels to the actual class names.
439
- The type of the numerical label (default string) needs to be consistent with the keys in
440
- label_map; no casting is carried out. If this is None, no classification labels are shown.
480
+ classification_label_map (dict, optional): optional, mapping of the string class labels to the actual
481
+ class names. The type of the numeric label (typically strings) needs to be consistent with the keys
482
+ in label_map; no casting is carried out. If [label_map] is None, no labels are shown (not even numbers
483
+ and confidence values).
441
484
 
442
- confidence_threshold: optional, threshold above which boxes are rendered. Can also be a dictionary
443
- mapping category IDs to thresholds.
485
+ confidence_threshold (float or dict, optional), threshold above which boxes are rendered. Can also be a
486
+ dictionary mapping category IDs to thresholds.
487
+
488
+ thickness (int, optional): line thickness in pixels
444
489
 
445
- thickness: line thickness in pixels. Default value is 4.
490
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
446
491
 
447
- expansion: number of pixels to expand bounding boxes on each side. Default is 0.
492
+ classification_confidence_threshold (float, optional): confidence above which classification results
493
+ are displayed
448
494
 
449
- classification_confidence_threshold: confidence above which classification result is retained.
495
+ max_classifications (int, optional): maximum number of classification results rendered for one image
450
496
 
451
- max_classifications: maximum number of classification results retained for one image.
497
+ colormap (list, optional): list of color names, used to choose colors for categories by
498
+ indexing with the values in [classes]; defaults to a reasonable set of colors
499
+
500
+ textalign (int, optional): TEXTALIGN_LEFT or TEXTALIGN_RIGHT
501
+
502
+ label_font_size (float, optional): font size for labels
452
503
 
453
504
  custom_strings: optional set of strings to append to detection labels, should have the
454
- same length as [detections]. Appended before classification labels, if classification
455
- data is provided.
456
-
457
- image is modified in place.
505
+ same length as [detections]. Appended before any classification labels.
458
506
  """
459
507
 
508
+ # Input validation
509
+ if (label_map is not None) and (isinstance(label_map,str)) and (label_map == 'show_categories'):
510
+ label_map = {}
511
+
460
512
  if custom_strings is not None:
461
513
  assert len(custom_strings) == len(detections), \
462
514
  '{} custom strings provided for {} detections'.format(
@@ -477,8 +529,7 @@ def render_detection_bounding_boxes(detections, image,
477
529
  if isinstance(confidence_threshold,dict):
478
530
  rendering_threshold = confidence_threshold[detection['category']]
479
531
  else:
480
- rendering_threshold = confidence_threshold
481
-
532
+ rendering_threshold = confidence_threshold
482
533
 
483
534
  # Always render objects with a confidence of "None", this is typically used
484
535
  # for ground truth data.
@@ -489,7 +540,7 @@ def render_detection_bounding_boxes(detections, image,
489
540
  clss = detection['category']
490
541
 
491
542
  # {} is the default, which means "show labels with no mapping", so don't use "if label_map" here
492
- # if label_map:
543
+ # if label_map:
493
544
  if label_map is not None:
494
545
  label = label_map[clss] if clss in label_map else clss
495
546
  if score is not None:
@@ -560,25 +611,30 @@ def draw_bounding_boxes_on_image(image,
560
611
  thickness=DEFAULT_BOX_THICKNESS,
561
612
  expansion=0,
562
613
  display_strs=None,
563
- colormap=DEFAULT_COLORS,
614
+ colormap=None,
564
615
  textalign=TEXTALIGN_LEFT,
565
616
  label_font_size=DEFAULT_LABEL_FONT_SIZE):
566
617
  """
567
- Draws bounding boxes on an image.
618
+ Draws bounding boxes on an image. Modifies the image in place.
568
619
 
569
620
  Args:
570
- image: a PIL.Image object.
571
- boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
572
- The coordinates are in normalized format between [0, 1].
573
- classes: a list of ints or strings (that can be cast to ints) corresponding to the
574
- class labels of the boxes. This is only used for color selection.
575
- thickness: line thickness in pixels. Default value is 4.
576
- expansion: number of pixels to expand bounding boxes on each side. Default is 0.
577
- display_strs: list of list of strings.
578
- a list of strings for each bounding box.
579
- The reason to pass a list of strings for a
580
- bounding box is that it might contain
581
- multiple labels.
621
+
622
+ image (PIL.Image): the image on which we should draw boxes
623
+ boxes (np.array): a two-dimensional numpy array of size [N, 4], where N is the
624
+ number of boxes, and each row is (ymin, xmin, ymax, xmax). Coordinates should be
625
+ normalized to image height/width.
626
+ classes (list): a list of ints or string-formatted ints corresponding to the
627
+ class labels of the boxes. This is only used for color selection. Should have the same
628
+ length as [boxes].
629
+ thickness (int, optional): line thickness in pixels
630
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
631
+ display_strs (list, optional): list of list of strings (the outer list should have the
632
+ same length as [boxes]). Typically this is used to show (possibly multiple) detection
633
+ or classification categories and/or confidence values.
634
+ colormap (list, optional): list of color names, used to choose colors for categories by
635
+ indexing with the values in [classes]; defaults to a reasonable set of colors
636
+ textalign (int, optional): TEXTALIGN_LEFT or TEXTALIGN_RIGHT
637
+ label_font_size (float, optional): font size for labels
582
638
  """
583
639
 
584
640
  boxes_shape = boxes.shape
@@ -610,13 +666,13 @@ def draw_bounding_box_on_image(image,
610
666
  clss=None,
611
667
  thickness=DEFAULT_BOX_THICKNESS,
612
668
  expansion=0,
613
- display_str_list=(),
669
+ display_str_list=None,
614
670
  use_normalized_coordinates=True,
615
671
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
616
- colormap=DEFAULT_COLORS,
672
+ colormap=None,
617
673
  textalign=TEXTALIGN_LEFT):
618
674
  """
619
- Adds a bounding box to an image.
675
+ Adds a bounding box to an image. Modifies the image in place.
620
676
 
621
677
  Bounding box coordinates can be specified in either absolute (pixel) or
622
678
  normalized coordinates by setting the use_normalized_coordinates argument.
@@ -626,28 +682,36 @@ def draw_bounding_box_on_image(image,
626
682
  If the top of the bounding box extends to the edge of the image, the strings
627
683
  are displayed below the bounding box.
628
684
 
629
- Args:
630
- image: a PIL.Image object.
631
- ymin: ymin of bounding box - upper left.
632
- xmin: xmin of bounding box.
633
- ymax: ymax of bounding box.
634
- xmax: xmax of bounding box.
635
- clss: str, the class of the object in this bounding box; should be either an integer
636
- or a string-formatted integer.
637
- thickness: line thickness. Default value is 4.
638
- expansion: number of pixels to expand bounding boxes on each side. Default is 0.
639
- display_str_list: list of strings to display in box
640
- (each to be shown on its own line).
641
- use_normalized_coordinates: If True (default), treat coordinates
642
- ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
643
- coordinates as absolute.
644
- label_font_size: font size
645
-
646
685
  Adapted from:
647
686
 
648
687
  https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
688
+
689
+ Args:
690
+ image (PIL.Image.Image): the image on which we should draw a box
691
+ ymin (float): ymin of bounding box
692
+ xmin (float): xmin of bounding box
693
+ ymax (float): ymax of bounding box
694
+ xmax (float): xmax of bounding box
695
+ clss (int, optional): the class index of the object in this bounding box, used for choosing
696
+ a color; should be either an integer or a string-formatted integer
697
+ thickness (int, optional): line thickness in pixels
698
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
699
+ display_str_list (list, optional): list of strings to display above the box (each to be shown on its
700
+ own line)
701
+ use_normalized_coordinates (bool, optional): if True (default), treat coordinates
702
+ ymin, xmin, ymax, xmax as relative to the image, otherwise coordinates as absolute pixel values
703
+ label_font_size (float, optional): font size
704
+ colormap (list, optional): list of color names, used to choose colors for categories by
705
+ indexing with the values in [classes]; defaults to a reasonable set of colors
706
+ textalign (int, optional): TEXTALIGN_LEFT or TEXTALIGN_RIGHT
649
707
  """
650
708
 
709
+ if colormap is None:
710
+ colormap = DEFAULT_COLORS
711
+
712
+ if display_str_list is None:
713
+ display_str_list = []
714
+
651
715
  if clss is None:
652
716
  # Default to the MegaDetector animal class ID (1)
653
717
  color = colormap[1]
@@ -758,61 +822,29 @@ def draw_bounding_box_on_image(image,
758
822
  # ...def draw_bounding_box_on_image(...)
759
823
 
760
824
 
761
- def render_iMerit_boxes(boxes, classes, image,
762
- label_map=annotation_constants.annotation_bbox_category_id_to_name):
763
- """
764
- Renders bounding boxes and their category labels on a PIL image.
765
-
766
- Args:
767
- boxes: bounding box annotations from iMerit, format is:
768
- [x_rel, y_rel, w_rel, h_rel] (rel = relative coords)
769
- classes: the class IDs of the predicted class of each box/object
770
- image: PIL.Image object to annotate on
771
- label_map: optional dict mapping classes to a string for display
772
-
773
- Returns:
774
- image will be altered in place
775
- """
776
-
777
- display_boxes = []
778
-
779
- # list of lists, one list of strings for each bounding box (to accommodate multiple labels)
780
- display_strs = []
781
-
782
- for box, clss in zip(boxes, classes):
783
- if len(box) == 0:
784
- assert clss == 5
785
- continue
786
- x_rel, y_rel, w_rel, h_rel = box
787
- ymin, xmin = y_rel, x_rel
788
- ymax = ymin + h_rel
789
- xmax = xmin + w_rel
790
-
791
- display_boxes.append([ymin, xmin, ymax, xmax])
792
-
793
- if label_map:
794
- clss = label_map[int(clss)]
795
- display_strs.append([clss])
796
-
797
- display_boxes = np.array(display_boxes)
798
- draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs)
799
-
800
-
801
825
  def render_megadb_bounding_boxes(boxes_info, image):
802
826
  """
827
+ Render bounding boxes to an image, where those boxes are in the mostly-deprecated
828
+ MegaDB format, which looks like:
829
+
830
+ .. code-block::none
831
+
832
+ {
833
+ "category": "animal",
834
+ "bbox": [
835
+ 0.739,
836
+ 0.448,
837
+ 0.187,
838
+ 0.198
839
+ ]
840
+ }
841
+
803
842
  Args:
804
- boxes_info: list of dict, each dict represents a single detection
805
- {
806
- "category": "animal",
807
- "bbox": [
808
- 0.739,
809
- 0.448,
810
- 0.187,
811
- 0.198
812
- ]
813
- }
843
+ boxes_info (list): list of dicts, each dict represents a single detection
814
844
  where bbox coordinates are normalized [x_min, y_min, width, height]
815
- image: PIL.Image.Image, opened image
845
+ image (PIL.Image.Image): image to modify
846
+
847
+ :meta private:
816
848
  """
817
849
 
818
850
  display_boxes = []
@@ -833,15 +865,34 @@ def render_megadb_bounding_boxes(boxes_info, image):
833
865
  # ...def render_iMerit_boxes(...)
834
866
 
835
867
 
836
- def render_db_bounding_boxes(boxes, classes, image, original_size=None,
837
- label_map=None, thickness=DEFAULT_BOX_THICKNESS, expansion=0):
868
+ def render_db_bounding_boxes(boxes,
869
+ classes,
870
+ image,
871
+ original_size=None,
872
+ label_map=None,
873
+ thickness=DEFAULT_BOX_THICKNESS,
874
+ expansion=0):
838
875
  """
839
- Render bounding boxes (with class labels) on [image]. This is a wrapper for
876
+ Render bounding boxes (with class labels) on an image. This is a wrapper for
840
877
  draw_bounding_boxes_on_image, allowing the caller to operate on a resized image
841
- by providing the original size of the image; bboxes will be scaled accordingly.
878
+ by providing the original size of the image; boxes will be scaled accordingly.
842
879
 
843
- This function assumes that bounding boxes are in the COCO camera traps format,
844
- with absolute coordinates.
880
+ This function assumes that bounding boxes are in absolute coordinates, typically
881
+ because they come from COCO camera traps .json files.
882
+
883
+ Args:
884
+ boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
885
+ classes (list): list of ints (or string-formatted ints), used to choose labels (either
886
+ by literally rendering the class labels, or by indexing into [label_map])
887
+ image (PIL.Image.Image): image object to modify
888
+ original_size (tuple, optional): if this is not None, and the size is different than
889
+ the size of [image], we assume that [boxes] refer to the original size, and we scale
890
+ them accordingly before rendering
891
+ label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
892
+ species labels; if None, category labels are rendered verbatim (typically as numbers)
893
+ thickness (int, optional): line width
894
+ expansion (int, optional): a number of pixels to include on each side of a cropped
895
+ detection
845
896
  """
846
897
 
847
898
  display_boxes = []
@@ -873,43 +924,56 @@ def render_db_bounding_boxes(boxes, classes, image, original_size=None,
873
924
  display_strs.append([str(clss)])
874
925
 
875
926
  display_boxes = np.array(display_boxes)
876
- draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs,
877
- thickness=thickness, expansion=expansion)
927
+
928
+ draw_bounding_boxes_on_image(image,
929
+ display_boxes,
930
+ classes,
931
+ display_strs=display_strs,
932
+ thickness=thickness,
933
+ expansion=expansion)
878
934
 
879
935
  # ...def render_db_bounding_boxes(...)
880
936
 
881
937
 
882
- def draw_bounding_boxes_on_file(input_file, output_file, detections, confidence_threshold=0.0,
938
+ def draw_bounding_boxes_on_file(input_file,
939
+ output_file,
940
+ detections,
941
+ confidence_threshold=0.0,
883
942
  detector_label_map=DEFAULT_DETECTOR_LABEL_MAP,
884
- thickness=DEFAULT_BOX_THICKNESS, expansion=0,
885
- colormap=DEFAULT_COLORS,
943
+ thickness=DEFAULT_BOX_THICKNESS,
944
+ expansion=0,
945
+ colormap=None,
886
946
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
887
- custom_strings=None,target_size=None,
947
+ custom_strings=None,
948
+ target_size=None,
888
949
  ignore_exif_rotation=False):
889
950
  """
890
- Render detection bounding boxes on an image loaded from file, writing the results to a
891
- new image file.
892
-
893
- "detections" is in the API results format:
894
-
895
- [{"category": "2","conf": 0.996,"bbox": [0.0,0.2762,0.1234,0.2458]}]
896
-
897
- ...where the bbox is:
898
-
899
- [x_min, y_min, width_of_box, height_of_box]
900
-
901
- Normalized, with the origin at the upper-left.
902
-
903
- detector_label_map is a dict mapping category IDs to strings. If this is None,
904
- no confidence values or identifiers are shown If this is {}, just category indices and
905
- confidence values are shown.
906
-
907
- custom_strings: optional set of strings to append to detection labels, should have the
908
- same length as [detections]. Appended before classification labels, if classification
909
- data is provided.
951
+ Renders detection bounding boxes on an image loaded from file, optionally writing the results to
952
+ a new image file.
910
953
 
911
- target_size: tuple of (target_width,target_height). Either or both can be -1,
912
- see resize_image for documentation. If None or (-1,-1), uses the original image size.
954
+ Args:
955
+ input_file (str): filename or URL to load
956
+ output_file (str, optional): filename to which we should write the rendered image
957
+ detections (list): a list of dictionaries with keys 'conf' and 'bbox';
958
+ boxes are length-four arrays formatted as [x,y,w,h], normalized,
959
+ upper-left origin (this is the standard MD detection format)
960
+ detector_label_map (dict, optional): a dict mapping category IDs to strings. If this
961
+ is None, no confidence values or identifiers are shown If this is {}, just category
962
+ indices and confidence values are shown.
963
+ thickness (int, optional): line width in pixels for box rendering
964
+ expansion (int, optional): box expansion in pixels
965
+ colormap (list, optional): list of color names, used to choose colors for categories by
966
+ indexing with the values in [classes]; defaults to a reasonable set of colors
967
+ label_font_size (float, optional): label font size
968
+ custom_strings (list, optional): set of strings to append to detection labels, should have the
969
+ same length as [detections]. Appended before any classification labels.
970
+ target_size (tuple, optional): tuple of (target_width,target_height). Either or both can be -1,
971
+ see resize_image() for documentation. If None or (-1,-1), uses the original image size.
972
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
973
+ even if we are loading a JPEG and that JPEG says it should be rotated.
974
+
975
+ Returns:
976
+ PIL.Image.Image: loaded and modified image
913
977
  """
914
978
 
915
979
  image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
@@ -923,19 +987,40 @@ def draw_bounding_boxes_on_file(input_file, output_file, detections, confidence_
923
987
  thickness=thickness,expansion=expansion,colormap=colormap,
924
988
  custom_strings=custom_strings,label_font_size=label_font_size)
925
989
 
926
- image.save(output_file)
990
+ if output_file is not None:
991
+ image.save(output_file)
992
+
993
+ return image
927
994
 
928
995
 
929
- def draw_db_boxes_on_file(input_file, output_file, boxes, classes=None,
930
- label_map=None, thickness=DEFAULT_BOX_THICKNESS, expansion=0,
996
+ def draw_db_boxes_on_file(input_file,
997
+ output_file,
998
+ boxes,
999
+ classes=None,
1000
+ label_map=None,
1001
+ thickness=DEFAULT_BOX_THICKNESS,
1002
+ expansion=0,
931
1003
  ignore_exif_rotation=False):
932
1004
  """
933
- Render COCO bounding boxes (in absolute coordinates) on an image loaded from file, writing the
934
- results to a new image file.
1005
+ Render COCO-formatted bounding boxes (in absolute coordinates) on an image loaded from file,
1006
+ writing the results to a new image file.
935
1007
 
936
- classes is a list of integer category IDs.
1008
+ Args:
1009
+ input_file (str): image file to read
1010
+ output_file (str): image file to write
1011
+ boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
1012
+ classes (list, optional): list of ints (or string-formatted ints), used to choose
1013
+ labels (either by literally rendering the class labels, or by indexing into [label_map])
1014
+ label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
1015
+ species labels; if None, category labels are rendered verbatim (typically as numbers)
1016
+ thickness (int, optional): line width
1017
+ expansion (int, optional): a number of pixels to include on each side of a cropped
1018
+ detection
1019
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
1020
+ even if we are loading a JPEG and that JPEG says it should be rotated
937
1021
 
938
- detector_label_map is a dict mapping category IDs to strings.
1022
+ Returns:
1023
+ PIL.Image.Image: the loaded and modified image
939
1024
  """
940
1025
 
941
1026
  image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
@@ -945,25 +1030,29 @@ def draw_db_boxes_on_file(input_file, output_file, boxes, classes=None,
945
1030
 
946
1031
  render_db_bounding_boxes(boxes, classes, image, original_size=None,
947
1032
  label_map=label_map, thickness=thickness, expansion=expansion)
948
-
1033
+
949
1034
  image.save(output_file)
950
1035
 
951
-
1036
+ return image
1037
+
952
1038
  # ...def draw_bounding_boxes_on_file(...)
953
1039
 
954
1040
 
955
1041
  def gray_scale_fraction(image,crop_size=(0.1,0.1)):
956
1042
  """
957
- Returns the fraction of the pixels in [image] that appear to be grayscale (R==G==B),
1043
+ Computes the fraction of the pixels in [image] that appear to be grayscale (R==G==B),
958
1044
  useful for approximating whether this is a night-time image when flash information is not
959
1045
  available in EXIF data (or for video frames, where this information is often not available
960
1046
  in structured metadata at all).
961
1047
 
962
- [image] can be a PIL image or a file name.
963
-
964
- crop_size should be a 2-element list/tuple, representing the fraction of the image
965
- to crop at the top and bottom, respectively, before analyzing (to minimize the possibility
966
- of including color elements in the image chrome).
1048
+ Args:
1049
+ image (str or PIL.Image.Image): Image, filename, or URL to analyze
1050
+ crop_size (optional): a 2-element list/tuple, representing the fraction of the
1051
+ image to crop at the top and bottom, respectively, before analyzing (to minimize
1052
+ the possibility of including color elements in the image overlay)
1053
+
1054
+ Returns:
1055
+ float: the fraction of pixels in [image] that appear to be grayscale (R==G==B)
967
1056
  """
968
1057
 
969
1058
  if isinstance(image,str):
@@ -1059,7 +1148,7 @@ def _resize_absolute_image(input_output_files,
1059
1148
  target_width,target_height,no_enlarge_width,verbose,quality):
1060
1149
 
1061
1150
  """
1062
- Internal wrappter for resize_image used in the context of a batch resize operation.
1151
+ Internal wrapper for resize_image used in the context of a batch resize operation.
1063
1152
  """
1064
1153
 
1065
1154
  input_fn_abs = input_output_files[0]
@@ -1085,22 +1174,43 @@ def _resize_absolute_image(input_output_files,
1085
1174
 
1086
1175
 
1087
1176
  def resize_images(input_file_to_output_file,
1088
- target_width=-1, target_height=-1,
1089
- no_enlarge_width=False, verbose=False, quality='keep',
1090
- pool_type='process', n_workers=10):
1177
+ target_width=-1,
1178
+ target_height=-1,
1179
+ no_enlarge_width=False,
1180
+ verbose=False,
1181
+ quality='keep',
1182
+ pool_type='process',
1183
+ n_workers=10):
1091
1184
  """
1092
- Resize all images the dictionary [input_file_to_output_file].
1093
-
1094
- Defaults to parallelizing across processes.
1095
-
1096
- See resize_image() for parameter information.
1097
-
1185
+ Resizes all images the dictionary [input_file_to_output_file].
1186
+
1098
1187
  TODO: This is a little more redundant with resize_image_folder than I would like;
1099
1188
  refactor resize_image_folder to call resize_images. Not doing that yet because
1100
1189
  at the time I'm writing this comment, a lot of code depends on resize_image_folder
1101
1190
  and I don't want to rock the boat yet.
1102
- """
1191
+
1192
+ Args:
1193
+ input_file_to_output_file (dict): dict mapping images that exist to the locations
1194
+ where the resized versions should be written
1195
+ target_width (int, optional): width to which we should resize this image, or -1
1196
+ to let target_height determine the size
1197
+ target_height (int, optional): height to which we should resize this image, or -1
1198
+ to let target_width determine the size
1199
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
1200
+ [target width] is larger than the original image width, does not modify the image,
1201
+ but will write to output_file if supplied
1202
+ verbose (bool, optional): enable additional debug output
1203
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
1204
+ pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
1205
+ parallelization; ignored if n_workers <= 1
1206
+ n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
1207
+ to disable parallelization
1103
1208
 
1209
+ Returns:
1210
+ list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1211
+ 'status' will be 'success' or 'error'; 'error' will be None for successful cases,
1212
+ otherwise will contain the image-specific error.
1213
+ """
1104
1214
 
1105
1215
  assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
1106
1216
 
@@ -1146,19 +1256,48 @@ def resize_images(input_file_to_output_file,
1146
1256
  # ...def resize_images(...)
1147
1257
 
1148
1258
 
1149
- def resize_image_folder(input_folder, output_folder=None,
1150
- target_width=-1, target_height=-1,
1151
- no_enlarge_width=False, verbose=False, quality='keep',
1152
- pool_type='process', n_workers=10, recursive=True,
1259
+ def resize_image_folder(input_folder,
1260
+ output_folder=None,
1261
+ target_width=-1,
1262
+ target_height=-1,
1263
+ no_enlarge_width=False,
1264
+ verbose=False,
1265
+ quality='keep',
1266
+ pool_type='process',
1267
+ n_workers=10,
1268
+ recursive=True,
1153
1269
  image_files_relative=None):
1154
1270
  """
1155
- Resize all images in a folder (defaults to recursive)
1271
+ Resize all images in a folder (defaults to recursive).
1156
1272
 
1157
1273
  Defaults to in-place resizing (output_folder is optional).
1158
1274
 
1159
- Defaults to parallelizing across processes.
1160
-
1161
- See resize_image() for parameter information.
1275
+ Args:
1276
+ input_folder (str): folder in which we should find images to resize
1277
+ output_folder (str, optional): folder in which we should write resized images. If
1278
+ None, resizes images in place. Otherwise, maintains relative paths in the target
1279
+ folder.
1280
+ target_width (int, optional): width to which we should resize this image, or -1
1281
+ to let target_height determine the size
1282
+ target_height (int, optional): height to which we should resize this image, or -1
1283
+ to let target_width determine the size
1284
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
1285
+ [target width] is larger than the original image width, does not modify the image,
1286
+ but will write to output_file if supplied
1287
+ verbose (bool, optional): enable additional debug output
1288
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
1289
+ pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
1290
+ parallelization; ignored if n_workers <= 1
1291
+ n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
1292
+ to disable parallelization
1293
+ recursive (bool, optional): whether to search [input_folder] recursively for images.
1294
+ image_files_relative (list, optional): if not None, skips any relative paths not
1295
+ in this list.
1296
+
1297
+ Returns:
1298
+ list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1299
+ 'status' will be 'success' or 'error'; 'error' will be None for successful cases,
1300
+ otherwise will contain the image-specific error.
1162
1301
  """
1163
1302
 
1164
1303
  assert os.path.isdir(input_folder), '{} is not a folder'.format(input_folder)
@@ -1171,12 +1310,20 @@ def resize_image_folder(input_folder, output_folder=None,
1171
1310
  assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
1172
1311
 
1173
1312
  if image_files_relative is None:
1174
- image_files_relative = find_images(input_folder,recursive=recursive,return_relative_paths=True)
1313
+
1314
+ if verbose:
1315
+ print('Enumerating images')
1316
+
1317
+ image_files_relative = find_images(input_folder,recursive=recursive,
1318
+ return_relative_paths=True,convert_slashes=True)
1175
1319
  if verbose:
1176
1320
  print('Found {} images'.format(len(image_files_relative)))
1177
1321
 
1178
1322
  if n_workers == 1:
1179
1323
 
1324
+ if verbose:
1325
+ print('Resizing images')
1326
+
1180
1327
  results = []
1181
1328
  for fn_relative in tqdm(image_files_relative):
1182
1329
  results.append(_resize_relative_image(fn_relative,
@@ -1215,6 +1362,148 @@ def resize_image_folder(input_folder, output_folder=None,
1215
1362
  # ...def resize_image_folder(...)
1216
1363
 
1217
1364
 
1365
+ #%% Image integrity checking functions
1366
+
1367
+ def check_image_integrity(filename,modes=None):
1368
+ """
1369
+ Check whether we can successfully load an image via OpenCV and/or PIL.
1370
+
1371
+ Args:
1372
+ filename (str): the filename to evaluate
1373
+ modes (list, optional): a list containing one or more of:
1374
+
1375
+ - 'cv'
1376
+ - 'pil'
1377
+ - 'skimage'
1378
+ - 'jpeg_trailer'
1379
+
1380
+ 'jpeg_trailer' checks that the binary data ends with ffd9. It does not check whether
1381
+ the image is actually a jpeg, and even if it is, there are lots of reasons the image might not
1382
+ end with ffd9. It's also true the JPEGs that cause "premature end of jpeg segment" issues
1383
+ don't end with ffd9, so this may be a useful diagnostic. High precision, very low recall
1384
+ for corrupt jpegs.
1385
+
1386
+ Set to None to use all modes.
1387
+
1388
+ Returns:
1389
+ dict: a dict with a key called 'file' (the value of [filename]), one key for each string in
1390
+ [modes] (a success indicator for that mode, specifically a string starting with either
1391
+ 'success' or 'error').
1392
+ """
1393
+
1394
+ if modes is None:
1395
+ modes = ('cv','pil','skimage','jpeg_trailer')
1396
+ else:
1397
+ if isinstance(modes,str):
1398
+ modes = [modes]
1399
+ for mode in modes:
1400
+ assert mode in ('cv','pil','skimage'), 'Unrecognized mode {}'.format(mode)
1401
+
1402
+ assert os.path.isfile(filename), 'Could not find file {}'.format(filename)
1403
+
1404
+ result = {}
1405
+ result['file'] = filename
1406
+
1407
+ for mode in modes:
1408
+
1409
+ result[mode] = 'unknown'
1410
+ if mode == 'pil':
1411
+ try:
1412
+ pil_im = load_image(filename) # noqa
1413
+ assert pil_im is not None
1414
+ result[mode] = 'success'
1415
+ except Exception as e:
1416
+ result[mode] = 'error: {}'.format(str(e))
1417
+ elif mode == 'cv':
1418
+ try:
1419
+ cv_im = cv2.imread(filename)
1420
+ assert cv_im is not None, 'Unknown opencv read failure'
1421
+ numpy_im = np.asarray(cv_im) # noqa
1422
+ result[mode] = 'success'
1423
+ except Exception as e:
1424
+ result[mode] = 'error: {}'.format(str(e))
1425
+ elif mode == 'skimage':
1426
+ try:
1427
+ # This is not a standard dependency
1428
+ from skimage import io as skimage_io # noqa
1429
+ except Exception:
1430
+ result[mode] = 'could not import skimage, run pip install scikit-image'
1431
+ return result
1432
+ try:
1433
+ skimage_im = skimage_io.imread(filename) # noqa
1434
+ assert skimage_im is not None
1435
+ result[mode] = 'success'
1436
+ except Exception as e:
1437
+ result[mode] = 'error: {}'.format(str(e))
1438
+ elif mode == 'jpeg_trailer':
1439
+ # https://stackoverflow.com/a/48282863/16644970
1440
+ try:
1441
+ with open(filename, 'rb') as f:
1442
+ check_chars = f.read()[-2:]
1443
+ if check_chars != b'\xff\xd9':
1444
+ result[mode] = 'invalid jpeg trailer: {}'.format(str(check_chars))
1445
+ else:
1446
+ result[mode] = 'success'
1447
+ except Exception as e:
1448
+ result[mode] = 'error: {}'.format(str(e))
1449
+
1450
+ # ...for each mode
1451
+
1452
+ return result
1453
+
1454
+ # ...def check_image_integrity(...)
1455
+
1456
+
1457
+ def parallel_check_image_integrity(filenames,
1458
+ modes=None,
1459
+ max_workers=16,
1460
+ use_threads=True,
1461
+ recursive=True):
1462
+ """
1463
+ Check whether we can successfully load a list of images via OpenCV and/or PIL.
1464
+
1465
+ Args:
1466
+ filenames (list or str): a list of image filenames or a folder
1467
+ mode (list): see check_image_integrity() for documentation on the [modes] parameter
1468
+ max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
1469
+ parallelization
1470
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
1471
+ parallelization
1472
+ recursive (bool, optional): if [filenames] is a folder, whether to search recursively for images.
1473
+ Ignored if [filenames] is a list.
1474
+
1475
+ Returns:
1476
+ list: a list of dicts, each with a key called 'file' (the value of [filename]), one key for
1477
+ each string in [modes] (a success indicator for that mode, specifically a string starting
1478
+ with either 'success' or 'error').
1479
+ """
1480
+
1481
+ n_workers = min(max_workers,len(filenames))
1482
+
1483
+ if isinstance(filenames,str) and os.path.isdir(filenames):
1484
+ filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
1485
+
1486
+ print('Checking image integrity for {} filenames'.format(len(filenames)))
1487
+
1488
+ if n_workers <= 1:
1489
+
1490
+ results = []
1491
+ for filename in filenames:
1492
+ results.append(check_image_integrity(filename,modes=modes))
1493
+
1494
+ else:
1495
+
1496
+ if use_threads:
1497
+ pool = ThreadPool(n_workers)
1498
+ else:
1499
+ pool = Pool(n_workers)
1500
+
1501
+ results = list(tqdm(pool.imap(
1502
+ partial(check_image_integrity,modes=modes),filenames), total=len(filenames)))
1503
+
1504
+ return results
1505
+
1506
+
1218
1507
  #%% Test drivers
1219
1508
 
1220
1509
  if False:
@@ -1228,4 +1517,21 @@ if False:
1228
1517
 
1229
1518
  resize_results = resize_image_folder(input_folder,output_folder,
1230
1519
  target_width=1280,verbose=True,quality=85,no_enlarge_width=True,
1231
- pool_type='process',n_workers=10)
1520
+ pool_type='process',n_workers=10)
1521
+
1522
+
1523
+ #%% Integrity checking test
1524
+
1525
+ from md_utils import md_tests
1526
+ options = md_tests.download_test_data()
1527
+ folder = options.scratch_dir
1528
+
1529
+ results = parallel_check_image_integrity(folder,max_workers=8)
1530
+
1531
+ modes = ['cv','pil','skimage','jpeg_trailer']
1532
+
1533
+ for r in results:
1534
+ for mode in modes:
1535
+ if r[mode] != 'success':
1536
+ s = r[mode]
1537
+ print('Mode {} failed for {}:\n{}\n'.format(mode,r['file'],s))