megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,29 +1,39 @@
1
- ########
2
- #
3
- # visualization_utils.py
4
- #
5
- # Core rendering functions shared across visualization scripts
6
- #
7
- ########
1
+ """
2
+
3
+ visualization_utils.py
4
+
5
+ Rendering functions shared across visualization scripts
6
+
7
+ """
8
8
 
9
9
  #%% Constants and imports
10
10
 
11
- from io import BytesIO
12
- from typing import Union
13
11
  import time
14
-
15
- import matplotlib.pyplot as plt
16
12
  import numpy as np
17
13
  import requests
14
+ import os
15
+ import cv2
16
+
17
+ from io import BytesIO
18
18
  from PIL import Image, ImageFile, ImageFont, ImageDraw
19
+ from multiprocessing.pool import ThreadPool
20
+ from multiprocessing.pool import Pool
21
+ from tqdm import tqdm
22
+ from functools import partial
23
+
24
+ from md_utils.path_utils import find_images
19
25
 
20
26
  from data_management.annotations import annotation_constants
21
27
  from data_management.annotations.annotation_constants import (
22
- detector_bbox_category_id_to_name) # here id is int
28
+ detector_bbox_category_id_to_name)
23
29
 
24
30
  ImageFile.LOAD_TRUNCATED_IMAGES = True
25
31
 
26
- IMAGE_ROTATIONS = {
32
+ # Maps EXIF standard rotation identifiers to degrees. The value "1" indicates no
33
+ # rotation; this will be ignored. The values 2, 4, 5, and 7 are mirrored rotations,
34
+ # which are not supported (we'll assert() on this when we apply rotations).
35
+ EXIF_IMAGE_NO_ROTATION = 1
36
+ EXIF_IMAGE_ROTATIONS = {
27
37
  3: 180,
28
38
  6: 270,
29
39
  8: 90
@@ -32,23 +42,54 @@ IMAGE_ROTATIONS = {
32
42
  TEXTALIGN_LEFT = 0
33
43
  TEXTALIGN_RIGHT = 1
34
44
 
35
- # convert category ID from int to str
45
+ # Convert category ID from int to str
36
46
  DEFAULT_DETECTOR_LABEL_MAP = {
37
47
  str(k): v for k, v in detector_bbox_category_id_to_name.items()
38
48
  }
39
49
 
40
- # Retry on blob storage read failures
50
+ # Constants controlling retry behavior when fetching images from URLs
41
51
  n_retries = 10
42
52
  retry_sleep_time = 0.01
53
+
54
+ # If we try to open an image from a URL, and we encounter any error in this list,
55
+ # we'll retry, otherwise it's just an error.
43
56
  error_names_for_retry = ['ConnectionError']
44
57
 
45
58
  DEFAULT_BOX_THICKNESS = 4
46
59
  DEFAULT_LABEL_FONT_SIZE = 16
47
60
 
61
+ # Default color map for mapping integer category IDs to colors when rendering bounding
62
+ # boxes
63
+ DEFAULT_COLORS = [
64
+ 'AliceBlue', 'Red', 'RoyalBlue', 'Gold', 'Chartreuse', 'Aqua', 'Azure',
65
+ 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
66
+ 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson',
67
+ 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
68
+ 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
69
+ 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
70
+ 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'GoldenRod',
71
+ 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
72
+ 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
73
+ 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
74
+ 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
75
+ 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
76
+ 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
77
+ 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
78
+ 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
79
+ 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
80
+ 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
81
+ 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
82
+ 'RosyBrown', 'Aquamarine', 'SaddleBrown', 'Green', 'SandyBrown',
83
+ 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
84
+ 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
85
+ 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
86
+ 'WhiteSmoke', 'Yellow', 'YellowGreen'
87
+ ]
88
+
48
89
 
49
90
  #%% Functions
50
91
 
51
- def open_image(input_file: Union[str, BytesIO]) -> Image:
92
+ def open_image(input_file, ignore_exif_rotation=False):
52
93
  """
53
94
  Opens an image in binary format using PIL.Image and converts to RGB mode.
54
95
 
@@ -56,14 +97,16 @@ def open_image(input_file: Union[str, BytesIO]) -> Image:
56
97
 
57
98
  This operation is lazy; image will not be actually loaded until the first
58
99
  operation that needs to load it (for example, resizing), so file opening
59
- errors can show up later.
100
+ errors can show up later. load_image() is the non-lazy version of this function.
60
101
 
61
102
  Args:
62
- input_file: str or BytesIO, either a path to an image file (anything
63
- that PIL can open), or an image as a stream of bytes
103
+ input_file (str or BytesIO): can be a path to an image file (anything
104
+ that PIL can open), a URL, or an image as a stream of bytes
105
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
106
+ even if we are loading a JPEG and that JPEG says it should be rotated
64
107
 
65
108
  Returns:
66
- A PIL image object in RGB mode
109
+ PIL.Image.Image: A PIL Image object in RGB mode
67
110
  """
68
111
 
69
112
  if (isinstance(input_file, str)
@@ -94,6 +137,8 @@ def open_image(input_file: Union[str, BytesIO]) -> Image:
94
137
 
95
138
  else:
96
139
  image = Image.open(input_file)
140
+
141
+ # Convert to RGB if necessary
97
142
  if image.mode not in ('RGBA', 'RGB', 'L', 'I;16'):
98
143
  raise AttributeError(
99
144
  f'Image {input_file} uses unsupported mode {image.mode}')
@@ -101,25 +146,30 @@ def open_image(input_file: Union[str, BytesIO]) -> Image:
101
146
  # PIL.Image.convert() returns a converted copy of this image
102
147
  image = image.convert(mode='RGB')
103
148
 
104
- # Alter orientation as needed according to EXIF tag 0x112 (274) for Orientation
105
- #
106
- # https://gist.github.com/dangtrinhnt/a577ece4cbe5364aad28
107
- # https://www.media.mit.edu/pia/Research/deepview/exif.html
108
- #
109
- try:
110
- exif = image._getexif()
111
- orientation: int = exif.get(274, None) # 274 is the key for the Orientation field
112
- if orientation is not None and orientation in IMAGE_ROTATIONS:
113
- image = image.rotate(IMAGE_ROTATIONS[orientation], expand=True) # returns a rotated copy
114
- except Exception:
115
- pass
149
+ if not ignore_exif_rotation:
150
+ # Alter orientation as needed according to EXIF tag 0x112 (274) for Orientation
151
+ #
152
+ # https://gist.github.com/dangtrinhnt/a577ece4cbe5364aad28
153
+ # https://www.media.mit.edu/pia/Research/deepview/exif.html
154
+ #
155
+ try:
156
+ exif = image._getexif()
157
+ orientation: int = exif.get(274, None)
158
+ if (orientation is not None) and (orientation != EXIF_IMAGE_NO_ROTATION):
159
+ assert orientation in EXIF_IMAGE_ROTATIONS, \
160
+ 'Mirrored rotations are not supported'
161
+ image = image.rotate(EXIF_IMAGE_ROTATIONS[orientation], expand=True)
162
+ except Exception:
163
+ pass
116
164
 
117
165
  return image
118
166
 
167
+ # ...def open_image(...)
168
+
119
169
 
120
- def exif_preserving_save(pil_image,output_file):
170
+ def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85,verbose=False):
121
171
  """
122
- Save [pil_image] to [output_file], making a moderate attempt to preserve EXIF
172
+ Saves [pil_image] to [output_file], making a moderate attempt to preserve EXIF
123
173
  data and JPEG quality. Neither is guaranteed.
124
174
 
125
175
  Also see:
@@ -127,57 +177,106 @@ def exif_preserving_save(pil_image,output_file):
127
177
  https://discuss.dizzycoding.com/determining-jpg-quality-in-python-pil/
128
178
 
129
179
  ...for more ways to preserve jpeg quality if quality='keep' doesn't do the trick.
180
+
181
+ Args:
182
+ pil_image (Image): the PIL Image objct to save
183
+ output_file (str): the destination file
184
+ quality (str or int, optional): can be "keep" (default), or an integer from 0 to 100.
185
+ This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
186
+ and resize it in memory, for example, it's no longer a JPEG.
187
+ default_quality (int, optional): determines output quality when quality == 'keep' and we are
188
+ saving a non-JPEG source to a JPEG file
189
+ verbose (bool, optional): enable additional debug console output
130
190
  """
131
191
 
132
192
  # Read EXIF metadata
133
193
  exif = pil_image.info['exif'] if ('exif' in pil_image.info) else None
134
194
 
135
- # Write output with EXIF metadata if available, and quality='keep' if this is a JPEG
136
- # image. Unfortunately, neither parameter likes "None", so we get a slightly
137
- # icky cascade of if's here.
138
- if exif is not None:
139
- if pil_image.format == "JPEG":
140
- pil_image.save(output_file, exif=exif, quality='keep')
195
+ # Quality preservation is only supported for JPEG sources.
196
+ if pil_image.format != "JPEG":
197
+ if quality == 'keep':
198
+ if verbose:
199
+ print('Warning: quality "keep" passed when saving a non-JPEG source (during save to {})'.format(
200
+ output_file))
201
+ quality = default_quality
202
+
203
+ # Some output formats don't support the quality parameter, so we try once with,
204
+ # and once without. This is a horrible cascade of if's, but it's a consequence of
205
+ # the fact that "None" is not supported for either "exif" or "quality".
206
+
207
+ try:
208
+
209
+ if exif is not None:
210
+ pil_image.save(output_file, exif=exif, quality=quality)
141
211
  else:
142
- pil_image.save(output_file, exif=exif)
143
- else:
144
- if pil_image.format == "JPEG":
145
- pil_image.save(output_file, quality='keep')
212
+ pil_image.save(output_file, quality=quality)
213
+
214
+ except Exception:
215
+
216
+ if verbose:
217
+ print('Warning: failed to write {}, trying again without quality parameter'.format(output_file))
218
+ if exif is not None:
219
+ pil_image.save(output_file, exif=exif)
146
220
  else:
147
221
  pil_image.save(output_file)
148
222
 
149
-
150
- def load_image(input_file: Union[str, BytesIO]) -> Image:
151
- """
152
- Loads the image at input_file as a PIL Image into memory.
223
+ # ...def exif_preserving_save(...)
153
224
 
154
- Image.open() used in open_image() is lazy and errors will occur downstream
155
- if not explicitly loaded.
156
225
 
226
+ def load_image(input_file, ignore_exif_rotation=False):
227
+ """
228
+ Loads an image file. This is the non-lazy version of open_file(); i.e.,
229
+ it forces image decoding before returning.
230
+
157
231
  Args:
158
- input_file: str or BytesIO, either a path to an image file (anything
159
- that PIL can open), or an image as a stream of bytes
232
+ input_file (str or BytesIO): can be a path to an image file (anything
233
+ that PIL can open), a URL, or an image as a stream of bytes
234
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
235
+ even if we are loading a JPEG and that JPEG says it should be rotated
160
236
 
161
- Returns: PIL.Image.Image, in RGB mode
237
+ Returns:
238
+ PIL.Image.Image: a PIL Image object in RGB mode
162
239
  """
163
240
 
164
- image = open_image(input_file)
241
+ image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
165
242
  image.load()
166
243
  return image
167
244
 
168
245
 
169
- def resize_image(image, target_width, target_height=-1, output_file=None):
246
+ def resize_image(image, target_width=-1, target_height=-1, output_file=None,
247
+ no_enlarge_width=False, verbose=False, quality='keep'):
170
248
  """
171
- Resizes a PIL image object to the specified width and height; does not resize
249
+ Resizes a PIL Image object to the specified width and height; does not resize
172
250
  in place. If either width or height are -1, resizes with aspect ratio preservation.
173
- If both are -1, returns the original image (does not copy in this case).
174
251
 
175
- None is equivalent to -1 for target_width and target_height.
252
+ If target_width and target_height are both -1, does not modify the image, but
253
+ will write to output_file if supplied.
254
+
255
+ If no resizing is required, and an Image object is supplied, returns the original Image
256
+ object (i.e., does not copy).
176
257
 
177
- [image] can be a PIL image or a filename.
258
+ Args:
259
+ image (Image or str): PIL Image object or a filename (local file or URL)
260
+ target_width (int, optional): width to which we should resize this image, or -1
261
+ to let target_height determine the size
262
+ target_height (int, optional): height to which we should resize this image, or -1
263
+ to let target_width determine the size
264
+ output_file (str, optional): file to which we should save this image; if None,
265
+ just returns the image without saving
266
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
267
+ [target width] is larger than the original image width, does not modify the image,
268
+ but will write to output_file if supplied
269
+ verbose (bool, optional): enable additional debug output
270
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
271
+
272
+ returns:
273
+ PIL.Image.Image: the resized image, which may be the original image if no resizing is
274
+ required
178
275
  """
179
276
 
277
+ image_fn = 'in_memory'
180
278
  if isinstance(image,str):
279
+ image_fn = image
181
280
  image = load_image(image)
182
281
 
183
282
  if target_width is None:
@@ -185,11 +284,15 @@ def resize_image(image, target_width, target_height=-1, output_file=None):
185
284
 
186
285
  if target_height is None:
187
286
  target_height = -1
287
+
288
+ resize_required = True
188
289
 
189
- # Null operation
290
+ # No resize was requested, this is always a no-op
190
291
  if target_width == -1 and target_height == -1:
191
- return image
192
-
292
+
293
+ resize_required = False
294
+
295
+ # Does either dimension need to scale according to the other?
193
296
  elif target_width == -1 or target_height == -1:
194
297
 
195
298
  # Aspect ratio as width over height
@@ -202,76 +305,63 @@ def resize_image(image, target_width, target_height=-1, output_file=None):
202
305
  else:
203
306
  # w = ar * h
204
307
  target_width = int(aspect_ratio * target_height)
205
-
206
- # This parameter changed between Pillow versions 9 and 10, and for a bit, I'd like to
207
- # support both.
308
+
309
+ # If we're not enlarging images and this would be an enlarge operation
310
+ if (no_enlarge_width) and (target_width > image.size[0]):
311
+
312
+ if verbose:
313
+ print('Bypassing image enlarge for {} --> {}'.format(
314
+ image_fn,str(output_file)))
315
+ resize_required = False
316
+
317
+ # If the target size is the same as the original size
318
+ if (target_width == image.size[0]) and (target_height == image.size[1]):
319
+
320
+ resize_required = False
321
+
322
+ if not resize_required:
323
+
324
+ if output_file is not None:
325
+ if verbose:
326
+ print('No resize required for resize {} --> {}'.format(
327
+ image_fn,str(output_file)))
328
+ exif_preserving_save(image,output_file,quality=quality,verbose=verbose)
329
+ return image
330
+
331
+ assert target_width > 0 and target_height > 0, \
332
+ 'Invalid image resize target {},{}'.format(target_width,target_height)
333
+
334
+ # The antialiasing parameter changed between Pillow versions 9 and 10, and for a bit,
335
+ # I'd like to support both.
208
336
  try:
209
337
  resized_image = image.resize((target_width, target_height), Image.ANTIALIAS)
210
338
  except:
211
339
  resized_image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
212
340
 
213
341
  if output_file is not None:
214
- exif_preserving_save(resized_image,output_file)
342
+ exif_preserving_save(resized_image,output_file,quality=quality,verbose=verbose)
215
343
 
216
344
  return resized_image
217
345
 
218
-
219
- def show_images_in_a_row(images):
220
-
221
- num = len(images)
222
- assert num > 0
223
-
224
- if isinstance(images[0], str):
225
- images = [Image.open(img) for img in images]
226
-
227
- fig, axarr = plt.subplots(1, num, squeeze=False) # number of rows, number of columns
228
- fig.set_size_inches((num * 5, 25)) # each image is 2 inches wide
229
- for i, img in enumerate(images):
230
- axarr[0, i].set_axis_off()
231
- axarr[0, i].imshow(img)
232
- return fig
233
-
234
-
235
- # The following three functions are modified versions of those at:
236
- #
237
- # https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
238
-
239
- DEFAULT_COLORS = [
240
- 'AliceBlue', 'Red', 'RoyalBlue', 'Gold', 'Chartreuse', 'Aqua', 'Azure',
241
- 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
242
- 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson',
243
- 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
244
- 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
245
- 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
246
- 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'GoldenRod',
247
- 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
248
- 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
249
- 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
250
- 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
251
- 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
252
- 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
253
- 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
254
- 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
255
- 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
256
- 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
257
- 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
258
- 'RosyBrown', 'Aquamarine', 'SaddleBrown', 'Green', 'SandyBrown',
259
- 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
260
- 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
261
- 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
262
- 'WhiteSmoke', 'Yellow', 'YellowGreen'
263
- ]
346
+ # ...def resize_image(...)
264
347
 
265
348
 
266
349
  def crop_image(detections, image, confidence_threshold=0.15, expansion=0):
267
350
  """
268
- Crops detections above *confidence_threshold* from the PIL image *image*,
269
- returning a list of PIL images.
270
-
271
- *detections* should be a list of dictionaries with keys 'conf' and 'bbox';
272
- see bbox format description below. Normalized, [x,y,w,h], upper-left-origin.
351
+ Crops detections above [confidence_threshold] from the PIL image [image],
352
+ returning a list of PIL Images.
273
353
 
274
- *expansion* specifies a number of pixels to include on each side of the box.
354
+ Args:
355
+ detections (list): a list of dictionaries with keys 'conf' and 'bbox';
356
+ boxes are length-four arrays formatted as [x,y,w,h], normalized,
357
+ upper-left origin (this is the standard MD detection format)
358
+ image (Image): the PIL Image object from which we should crop detections
359
+ confidence_threshold (float, optional): only crop detections above this threshold
360
+ expansion (int, optional): a number of pixels to include on each side of a cropped
361
+ detection
362
+
363
+ Returns:
364
+ list: a possibly-empty list of PIL Image objects
275
365
  """
276
366
 
277
367
  ret_images = []
@@ -313,90 +403,112 @@ def crop_image(detections, image, confidence_threshold=0.15, expansion=0):
313
403
  return ret_images
314
404
 
315
405
 
316
- def render_detection_bounding_boxes(detections, image,
317
- label_map={},
406
+ def render_detection_bounding_boxes(detections,
407
+ image,
408
+ label_map='show_categories',
318
409
  classification_label_map=None,
319
- confidence_threshold=0.15, thickness=DEFAULT_BOX_THICKNESS, expansion=0,
410
+ confidence_threshold=0.15,
411
+ thickness=DEFAULT_BOX_THICKNESS,
412
+ expansion=0,
320
413
  classification_confidence_threshold=0.3,
321
414
  max_classifications=3,
322
- colormap=DEFAULT_COLORS,
415
+ colormap=None,
323
416
  textalign=TEXTALIGN_LEFT,
324
417
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
325
418
  custom_strings=None):
326
419
  """
327
- Renders bounding boxes, label, and confidence on an image if confidence is above the threshold.
328
-
329
- Boxes are in the format that's output from the batch processing API.
330
-
420
+ Renders bounding boxes (with labels and confidence values) on an image for all
421
+ detections above a threshold.
422
+
331
423
  Renders classification labels if present.
424
+
425
+ [image] is modified in place.
332
426
 
333
427
  Args:
334
428
 
335
- detections: detections on the image, example content:
336
- [
337
- {
338
- "category": "2",
339
- "conf": 0.996,
340
- "bbox": [
341
- 0.0,
342
- 0.2762,
343
- 0.1234,
344
- 0.2458
345
- ]
346
- }
347
- ]
348
-
349
- ...where the bbox coordinates are [x, y, box_width, box_height].
350
-
351
- (0, 0) is the upper-left. Coordinates are normalized.
352
-
353
- Supports classification results, if *detections* has the format
354
- [
355
- {
356
- "category": "2",
357
- "conf": 0.996,
358
- "bbox": [
359
- 0.0,
360
- 0.2762,
361
- 0.1234,
362
- 0.2458
363
- ]
364
- "classifications": [
365
- ["3", 0.901],
366
- ["1", 0.071],
367
- ["4", 0.025]
368
- ]
369
- }
370
- ]
429
+ detections (list): list of detections in the MD output format, for example:
430
+
431
+ .. code-block::none
432
+
433
+ [
434
+ {
435
+ "category": "2",
436
+ "conf": 0.996,
437
+ "bbox": [
438
+ 0.0,
439
+ 0.2762,
440
+ 0.1234,
441
+ 0.2458
442
+ ]
443
+ }
444
+ ]
445
+
446
+ ...where the bbox coordinates are [x, y, box_width, box_height].
447
+
448
+ (0, 0) is the upper-left. Coordinates are normalized.
449
+
450
+ Supports classification results, in the standard format:
451
+
452
+ .. code-block::none
453
+
454
+ [
455
+ {
456
+ "category": "2",
457
+ "conf": 0.996,
458
+ "bbox": [
459
+ 0.0,
460
+ 0.2762,
461
+ 0.1234,
462
+ 0.2458
463
+ ]
464
+ "classifications": [
465
+ ["3", 0.901],
466
+ ["1", 0.071],
467
+ ["4", 0.025]
468
+ ]
469
+ }
470
+ ]
371
471
 
372
- image: PIL.Image object
472
+ image (PIL.Image.Image): image on which we should render detections
373
473
 
374
- label_map: optional, mapping the numerical label to a string name. The type of the numerical label
375
- (default string) needs to be consistent with the keys in label_map; no casting is carried out.
376
- If this is None, no labels are shown.
474
+ label_map (dict, optional): optional, mapping the numeric label to a string name. The type of the
475
+ numeric label (typically strings) needs to be consistent with the keys in label_map; no casting is
476
+ carried out. If [label_map] is None, no labels are shown (not even numbers and confidence values).
477
+ If you want category numbers and confidence values without class labels, use the default value,
478
+ the string 'show_categories'.
377
479
 
378
- classification_label_map: optional, mapping of the string class labels to the actual class names.
379
- The type of the numerical label (default string) needs to be consistent with the keys in
380
- label_map; no casting is carried out. If this is None, no classification labels are shown.
480
+ classification_label_map (dict, optional): optional, mapping of the string class labels to the actual
481
+ class names. The type of the numeric label (typically strings) needs to be consistent with the keys
482
+ in label_map; no casting is carried out. If [label_map] is None, no labels are shown (not even numbers
483
+ and confidence values).
381
484
 
382
- confidence_threshold: optional, threshold above which boxes are rendered. Can also be a dictionary
383
- mapping category IDs to thresholds.
485
+ confidence_threshold (float or dict, optional), threshold above which boxes are rendered. Can also be a
486
+ dictionary mapping category IDs to thresholds.
487
+
488
+ thickness (int, optional): line thickness in pixels
489
+
490
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
384
491
 
385
- thickness: line thickness in pixels. Default value is 4.
492
+ classification_confidence_threshold (float, optional): confidence above which classification results
493
+ are displayed
386
494
 
387
- expansion: number of pixels to expand bounding boxes on each side. Default is 0.
495
+ max_classifications (int, optional): maximum number of classification results rendered for one image
388
496
 
389
- classification_confidence_threshold: confidence above which classification result is retained.
497
+ colormap (list, optional): list of color names, used to choose colors for categories by
498
+ indexing with the values in [classes]; defaults to a reasonable set of colors
390
499
 
391
- max_classifications: maximum number of classification results retained for one image.
500
+ textalign (int, optional): TEXTALIGN_LEFT or TEXTALIGN_RIGHT
501
+
502
+ label_font_size (float, optional): font size for labels
392
503
 
393
504
  custom_strings: optional set of strings to append to detection labels, should have the
394
- same length as [detections]. Appended before classification labels, if classification
395
- data is provided.
396
-
397
- image is modified in place.
505
+ same length as [detections]. Appended before any classification labels.
398
506
  """
399
507
 
508
+ # Input validation
509
+ if (label_map is not None) and (isinstance(label_map,str)) and (label_map == 'show_categories'):
510
+ label_map = {}
511
+
400
512
  if custom_strings is not None:
401
513
  assert len(custom_strings) == len(detections), \
402
514
  '{} custom strings provided for {} detections'.format(
@@ -417,8 +529,7 @@ def render_detection_bounding_boxes(detections, image,
417
529
  if isinstance(confidence_threshold,dict):
418
530
  rendering_threshold = confidence_threshold[detection['category']]
419
531
  else:
420
- rendering_threshold = confidence_threshold
421
-
532
+ rendering_threshold = confidence_threshold
422
533
 
423
534
  # Always render objects with a confidence of "None", this is typically used
424
535
  # for ground truth data.
@@ -429,7 +540,7 @@ def render_detection_bounding_boxes(detections, image,
429
540
  clss = detection['category']
430
541
 
431
542
  # {} is the default, which means "show labels with no mapping", so don't use "if label_map" here
432
- # if label_map:
543
+ # if label_map:
433
544
  if label_map is not None:
434
545
  label = label_map[clss] if clss in label_map else clss
435
546
  if score is not None:
@@ -491,6 +602,8 @@ def render_detection_bounding_boxes(detections, image,
491
602
  expansion=expansion, colormap=colormap, textalign=textalign,
492
603
  label_font_size=label_font_size)
493
604
 
605
+ # ...render_detection_bounding_boxes(...)
606
+
494
607
 
495
608
  def draw_bounding_boxes_on_image(image,
496
609
  boxes,
@@ -498,25 +611,30 @@ def draw_bounding_boxes_on_image(image,
498
611
  thickness=DEFAULT_BOX_THICKNESS,
499
612
  expansion=0,
500
613
  display_strs=None,
501
- colormap=DEFAULT_COLORS,
614
+ colormap=None,
502
615
  textalign=TEXTALIGN_LEFT,
503
616
  label_font_size=DEFAULT_LABEL_FONT_SIZE):
504
617
  """
505
- Draws bounding boxes on an image.
618
+ Draws bounding boxes on an image. Modifies the image in place.
506
619
 
507
620
  Args:
508
- image: a PIL.Image object.
509
- boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
510
- The coordinates are in normalized format between [0, 1].
511
- classes: a list of ints or strings (that can be cast to ints) corresponding to the
512
- class labels of the boxes. This is only used for color selection.
513
- thickness: line thickness in pixels. Default value is 4.
514
- expansion: number of pixels to expand bounding boxes on each side. Default is 0.
515
- display_strs: list of list of strings.
516
- a list of strings for each bounding box.
517
- The reason to pass a list of strings for a
518
- bounding box is that it might contain
519
- multiple labels.
621
+
622
+ image (PIL.Image): the image on which we should draw boxes
623
+ boxes (np.array): a two-dimensional numpy array of size [N, 4], where N is the
624
+ number of boxes, and each row is (ymin, xmin, ymax, xmax). Coordinates should be
625
+ normalized to image height/width.
626
+ classes (list): a list of ints or string-formatted ints corresponding to the
627
+ class labels of the boxes. This is only used for color selection. Should have the same
628
+ length as [boxes].
629
+ thickness (int, optional): line thickness in pixels
630
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
631
+ display_strs (list, optional): list of list of strings (the outer list should have the
632
+ same length as [boxes]). Typically this is used to show (possibly multiple) detection
633
+ or classification categories and/or confidence values.
634
+ colormap (list, optional): list of color names, used to choose colors for categories by
635
+ indexing with the values in [classes]; defaults to a reasonable set of colors
636
+ textalign (int, optional): TEXTALIGN_LEFT or TEXTALIGN_RIGHT
637
+ label_font_size (float, optional): font size for labels
520
638
  """
521
639
 
522
640
  boxes_shape = boxes.shape
@@ -537,6 +655,8 @@ def draw_bounding_boxes_on_image(image,
537
655
  textalign=textalign,
538
656
  label_font_size=label_font_size)
539
657
 
658
+ # ...draw_bounding_boxes_on_image(...)
659
+
540
660
 
541
661
  def draw_bounding_box_on_image(image,
542
662
  ymin,
@@ -546,13 +666,13 @@ def draw_bounding_box_on_image(image,
546
666
  clss=None,
547
667
  thickness=DEFAULT_BOX_THICKNESS,
548
668
  expansion=0,
549
- display_str_list=(),
669
+ display_str_list=None,
550
670
  use_normalized_coordinates=True,
551
671
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
552
- colormap=DEFAULT_COLORS,
672
+ colormap=None,
553
673
  textalign=TEXTALIGN_LEFT):
554
674
  """
555
- Adds a bounding box to an image.
675
+ Adds a bounding box to an image. Modifies the image in place.
556
676
 
557
677
  Bounding box coordinates can be specified in either absolute (pixel) or
558
678
  normalized coordinates by setting the use_normalized_coordinates argument.
@@ -562,24 +682,38 @@ def draw_bounding_box_on_image(image,
562
682
  If the top of the bounding box extends to the edge of the image, the strings
563
683
  are displayed below the bounding box.
564
684
 
685
+ Adapted from:
686
+
687
+ https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
688
+
565
689
  Args:
566
- image: a PIL.Image object.
567
- ymin: ymin of bounding box - upper left.
568
- xmin: xmin of bounding box.
569
- ymax: ymax of bounding box.
570
- xmax: xmax of bounding box.
571
- clss: str, the class of the object in this bounding box - will be cast to an int.
572
- thickness: line thickness. Default value is 4.
573
- expansion: number of pixels to expand bounding boxes on each side. Default is 0.
574
- display_str_list: list of strings to display in box
575
- (each to be shown on its own line).
576
- use_normalized_coordinates: If True (default), treat coordinates
577
- ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
578
- coordinates as absolute.
579
- label_font_size: font size to attempt to load arial.ttf with
690
+ image (PIL.Image.Image): the image on which we should draw a box
691
+ ymin (float): ymin of bounding box
692
+ xmin (float): xmin of bounding box
693
+ ymax (float): ymax of bounding box
694
+ xmax (float): xmax of bounding box
695
+ clss (int, optional): the class index of the object in this bounding box, used for choosing
696
+ a color; should be either an integer or a string-formatted integer
697
+ thickness (int, optional): line thickness in pixels
698
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
699
+ display_str_list (list, optional): list of strings to display above the box (each to be shown on its
700
+ own line)
701
+ use_normalized_coordinates (bool, optional): if True (default), treat coordinates
702
+ ymin, xmin, ymax, xmax as relative to the image, otherwise coordinates as absolute pixel values
703
+ label_font_size (float, optional): font size
704
+ colormap (list, optional): list of color names, used to choose colors for categories by
705
+ indexing with the values in [classes]; defaults to a reasonable set of colors
706
+ textalign (int, optional): TEXTALIGN_LEFT or TEXTALIGN_RIGHT
580
707
  """
581
708
 
709
+ if colormap is None:
710
+ colormap = DEFAULT_COLORS
711
+
712
+ if display_str_list is None:
713
+ display_str_list = []
714
+
582
715
  if clss is None:
716
+ # Default to the MegaDetector animal class ID (1)
583
717
  color = colormap[1]
584
718
  else:
585
719
  color = colormap[int(clss) % len(colormap)]
@@ -685,62 +819,32 @@ def draw_bounding_box_on_image(image,
685
819
 
686
820
  text_bottom -= (text_height + 2 * margin)
687
821
 
688
-
689
- def render_iMerit_boxes(boxes, classes, image,
690
- label_map=annotation_constants.annotation_bbox_category_id_to_name):
691
- """
692
- Renders bounding boxes and their category labels on a PIL image.
693
-
694
- Args:
695
- boxes: bounding box annotations from iMerit, format is:
696
- [x_rel, y_rel, w_rel, h_rel] (rel = relative coords)
697
- classes: the class IDs of the predicted class of each box/object
698
- image: PIL.Image object to annotate on
699
- label_map: optional dict mapping classes to a string for display
700
-
701
- Returns:
702
- image will be altered in place
703
- """
704
-
705
- display_boxes = []
706
-
707
- # list of lists, one list of strings for each bounding box (to accommodate multiple labels)
708
- display_strs = []
709
-
710
- for box, clss in zip(boxes, classes):
711
- if len(box) == 0:
712
- assert clss == 5
713
- continue
714
- x_rel, y_rel, w_rel, h_rel = box
715
- ymin, xmin = y_rel, x_rel
716
- ymax = ymin + h_rel
717
- xmax = xmin + w_rel
718
-
719
- display_boxes.append([ymin, xmin, ymax, xmax])
720
-
721
- if label_map:
722
- clss = label_map[int(clss)]
723
- display_strs.append([clss])
724
-
725
- display_boxes = np.array(display_boxes)
726
- draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs)
822
+ # ...def draw_bounding_box_on_image(...)
727
823
 
728
824
 
729
825
  def render_megadb_bounding_boxes(boxes_info, image):
730
826
  """
827
+ Render bounding boxes to an image, where those boxes are in the mostly-deprecated
828
+ MegaDB format, which looks like:
829
+
830
+ .. code-block::none
831
+
832
+ {
833
+ "category": "animal",
834
+ "bbox": [
835
+ 0.739,
836
+ 0.448,
837
+ 0.187,
838
+ 0.198
839
+ ]
840
+ }
841
+
731
842
  Args:
732
- boxes_info: list of dict, each dict represents a single detection
733
- {
734
- "category": "animal",
735
- "bbox": [
736
- 0.739,
737
- 0.448,
738
- 0.187,
739
- 0.198
740
- ]
741
- }
843
+ boxes_info (list): list of dicts, each dict represents a single detection
742
844
  where bbox coordinates are normalized [x_min, y_min, width, height]
743
- image: PIL.Image.Image, opened image
845
+ image (PIL.Image.Image): image to modify
846
+
847
+ :meta private:
744
848
  """
745
849
 
746
850
  display_boxes = []
@@ -758,16 +862,37 @@ def render_megadb_bounding_boxes(boxes_info, image):
758
862
  display_boxes = np.array(display_boxes)
759
863
  draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs)
760
864
 
865
+ # ...def render_iMerit_boxes(...)
866
+
761
867
 
762
- def render_db_bounding_boxes(boxes, classes, image, original_size=None,
763
- label_map=None, thickness=DEFAULT_BOX_THICKNESS, expansion=0):
868
+ def render_db_bounding_boxes(boxes,
869
+ classes,
870
+ image,
871
+ original_size=None,
872
+ label_map=None,
873
+ thickness=DEFAULT_BOX_THICKNESS,
874
+ expansion=0):
764
875
  """
765
- Render bounding boxes (with class labels) on [image]. This is a wrapper for
876
+ Render bounding boxes (with class labels) on an image. This is a wrapper for
766
877
  draw_bounding_boxes_on_image, allowing the caller to operate on a resized image
767
- by providing the original size of the image; bboxes will be scaled accordingly.
878
+ by providing the original size of the image; boxes will be scaled accordingly.
768
879
 
769
- This function assumes that bounding boxes are in the COCO camera traps format,
770
- with absolute coordinates.
880
+ This function assumes that bounding boxes are in absolute coordinates, typically
881
+ because they come from COCO camera traps .json files.
882
+
883
+ Args:
884
+ boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
885
+ classes (list): list of ints (or string-formatted ints), used to choose labels (either
886
+ by literally rendering the class labels, or by indexing into [label_map])
887
+ image (PIL.Image.Image): image object to modify
888
+ original_size (tuple, optional): if this is not None, and the size is different than
889
+ the size of [image], we assume that [boxes] refer to the original size, and we scale
890
+ them accordingly before rendering
891
+ label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
892
+ species labels; if None, category labels are rendered verbatim (typically as numbers)
893
+ thickness (int, optional): line width
894
+ expansion (int, optional): a number of pixels to include on each side of a cropped
895
+ detection
771
896
  """
772
897
 
773
898
  display_boxes = []
@@ -799,41 +924,59 @@ def render_db_bounding_boxes(boxes, classes, image, original_size=None,
799
924
  display_strs.append([str(clss)])
800
925
 
801
926
  display_boxes = np.array(display_boxes)
802
- draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs,
803
- thickness=thickness, expansion=expansion)
927
+
928
+ draw_bounding_boxes_on_image(image,
929
+ display_boxes,
930
+ classes,
931
+ display_strs=display_strs,
932
+ thickness=thickness,
933
+ expansion=expansion)
804
934
 
935
+ # ...def render_db_bounding_boxes(...)
805
936
 
806
- def draw_bounding_boxes_on_file(input_file, output_file, detections, confidence_threshold=0.0,
937
+
938
+ def draw_bounding_boxes_on_file(input_file,
939
+ output_file,
940
+ detections,
941
+ confidence_threshold=0.0,
807
942
  detector_label_map=DEFAULT_DETECTOR_LABEL_MAP,
808
- thickness=DEFAULT_BOX_THICKNESS, expansion=0,
809
- colormap=DEFAULT_COLORS,
943
+ thickness=DEFAULT_BOX_THICKNESS,
944
+ expansion=0,
945
+ colormap=None,
810
946
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
811
- custom_strings=None,target_size=None):
947
+ custom_strings=None,
948
+ target_size=None,
949
+ ignore_exif_rotation=False):
812
950
  """
813
- Render detection bounding boxes on an image loaded from file, writing the results to a
814
- new image file.
815
-
816
- "detections" is in the API results format:
817
-
818
- [{"category": "2","conf": 0.996,"bbox": [0.0,0.2762,0.1234,0.2458]}]
819
-
820
- ...where the bbox is:
821
-
822
- [x_min, y_min, width_of_box, height_of_box]
823
-
824
- Normalized, with the origin at the upper-left.
825
-
826
- detector_label_map is a dict mapping category IDs to strings.
951
+ Renders detection bounding boxes on an image loaded from file, optionally writing the results to
952
+ a new image file.
827
953
 
828
- custom_strings: optional set of strings to append to detection labels, should have the
829
- same length as [detections]. Appended before classification labels, if classification
830
- data is provided.
831
-
832
- target_size: tuple of (target_width,target_height). Either or both can be -1,
833
- see resize_image for documentation. If None or (-1,-1), uses the original image size.
954
+ Args:
955
+ input_file (str): filename or URL to load
956
+ output_file (str, optional): filename to which we should write the rendered image
957
+ detections (list): a list of dictionaries with keys 'conf' and 'bbox';
958
+ boxes are length-four arrays formatted as [x,y,w,h], normalized,
959
+ upper-left origin (this is the standard MD detection format)
960
+ detector_label_map (dict, optional): a dict mapping category IDs to strings. If this
961
+ is None, no confidence values or identifiers are shown If this is {}, just category
962
+ indices and confidence values are shown.
963
+ thickness (int, optional): line width in pixels for box rendering
964
+ expansion (int, optional): box expansion in pixels
965
+ colormap (list, optional): list of color names, used to choose colors for categories by
966
+ indexing with the values in [classes]; defaults to a reasonable set of colors
967
+ label_font_size (float, optional): label font size
968
+ custom_strings (list, optional): set of strings to append to detection labels, should have the
969
+ same length as [detections]. Appended before any classification labels.
970
+ target_size (tuple, optional): tuple of (target_width,target_height). Either or both can be -1,
971
+ see resize_image() for documentation. If None or (-1,-1), uses the original image size.
972
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
973
+ even if we are loading a JPEG and that JPEG says it should be rotated.
974
+
975
+ Returns:
976
+ PIL.Image.Image: loaded and modified image
834
977
  """
835
978
 
836
- image = open_image(input_file)
979
+ image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
837
980
 
838
981
  if target_size is not None:
839
982
  image = resize_image(image,target_size[0],target_size[1])
@@ -844,43 +987,72 @@ def draw_bounding_boxes_on_file(input_file, output_file, detections, confidence_
844
987
  thickness=thickness,expansion=expansion,colormap=colormap,
845
988
  custom_strings=custom_strings,label_font_size=label_font_size)
846
989
 
847
- image.save(output_file)
990
+ if output_file is not None:
991
+ image.save(output_file)
992
+
993
+ return image
848
994
 
849
995
 
850
- def draw_db_boxes_on_file(input_file, output_file, boxes, classes=None,
851
- label_map=None, thickness=DEFAULT_BOX_THICKNESS, expansion=0):
996
+ def draw_db_boxes_on_file(input_file,
997
+ output_file,
998
+ boxes,
999
+ classes=None,
1000
+ label_map=None,
1001
+ thickness=DEFAULT_BOX_THICKNESS,
1002
+ expansion=0,
1003
+ ignore_exif_rotation=False):
852
1004
  """
853
- Render COCO bounding boxes (in absolute coordinates) on an image loaded from file, writing the
854
- results to a new image file.
1005
+ Render COCO-formatted bounding boxes (in absolute coordinates) on an image loaded from file,
1006
+ writing the results to a new image file.
855
1007
 
856
- classes is a list of integer category IDs.
1008
+ Args:
1009
+ input_file (str): image file to read
1010
+ output_file (str): image file to write
1011
+ boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
1012
+ classes (list, optional): list of ints (or string-formatted ints), used to choose
1013
+ labels (either by literally rendering the class labels, or by indexing into [label_map])
1014
+ label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
1015
+ species labels; if None, category labels are rendered verbatim (typically as numbers)
1016
+ thickness (int, optional): line width
1017
+ expansion (int, optional): a number of pixels to include on each side of a cropped
1018
+ detection
1019
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
1020
+ even if we are loading a JPEG and that JPEG says it should be rotated
857
1021
 
858
- detector_label_map is a dict mapping category IDs to strings.
1022
+ Returns:
1023
+ PIL.Image.Image: the loaded and modified image
859
1024
  """
860
1025
 
861
- image = open_image(input_file)
1026
+ image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
862
1027
 
863
1028
  if classes is None:
864
1029
  classes = [0] * len(boxes)
865
1030
 
866
1031
  render_db_bounding_boxes(boxes, classes, image, original_size=None,
867
1032
  label_map=label_map, thickness=thickness, expansion=expansion)
868
-
1033
+
869
1034
  image.save(output_file)
870
1035
 
1036
+ return image
1037
+
1038
+ # ...def draw_bounding_boxes_on_file(...)
1039
+
871
1040
 
872
1041
  def gray_scale_fraction(image,crop_size=(0.1,0.1)):
873
1042
  """
874
- Returns the fraction of the pixels in [image] that appear to be grayscale (R==G==B),
1043
+ Computes the fraction of the pixels in [image] that appear to be grayscale (R==G==B),
875
1044
  useful for approximating whether this is a night-time image when flash information is not
876
1045
  available in EXIF data (or for video frames, where this information is often not available
877
1046
  in structured metadata at all).
878
1047
 
879
- [image] can be a PIL image or a file name.
880
-
881
- crop_size should be a 2-element list/tuple, representing the fraction of the image
882
- to crop at the top and bottom, respectively, before analyzing (to minimize the possibility
883
- of including color elements in the image chrome).
1048
+ Args:
1049
+ image (str or PIL.Image.Image): Image, filename, or URL to analyze
1050
+ crop_size (optional): a 2-element list/tuple, representing the fraction of the
1051
+ image to crop at the top and bottom, respectively, before analyzing (to minimize
1052
+ the possibility of including color elements in the image overlay)
1053
+
1054
+ Returns:
1055
+ float: the fraction of pixels in [image] that appear to be grayscale (R==G==B)
884
1056
  """
885
1057
 
886
1058
  if isinstance(image,str):
@@ -938,3 +1110,428 @@ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
938
1110
  r, g, b = image.getpixel((i,j))
939
1111
  if r == g and r == b and g == b:
940
1112
  n_gray_pixels += 1
1113
+
1114
+
1115
+ # ...def gray_scale_fraction(...)
1116
+
1117
+
1118
+ def _resize_relative_image(fn_relative,
1119
+ input_folder,output_folder,
1120
+ target_width,target_height,no_enlarge_width,verbose,quality):
1121
+ """
1122
+ Internal function for resizing an image from one folder to another,
1123
+ maintaining relative path.
1124
+ """
1125
+
1126
+ input_fn_abs = os.path.join(input_folder,fn_relative)
1127
+ output_fn_abs = os.path.join(output_folder,fn_relative)
1128
+ os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
1129
+ try:
1130
+ _ = resize_image(input_fn_abs,
1131
+ output_file=output_fn_abs,
1132
+ target_width=target_width, target_height=target_height,
1133
+ no_enlarge_width=no_enlarge_width, verbose=verbose, quality=quality)
1134
+ status = 'success'
1135
+ error = None
1136
+ except Exception as e:
1137
+ if verbose:
1138
+ print('Error resizing {}: {}'.format(fn_relative,str(e)))
1139
+ status = 'error'
1140
+ error = str(e)
1141
+
1142
+ return {'fn_relative':fn_relative,'status':status,'error':error}
1143
+
1144
+ # ...def _resize_relative_image(...)
1145
+
1146
+
1147
+ def _resize_absolute_image(input_output_files,
1148
+ target_width,target_height,no_enlarge_width,verbose,quality):
1149
+
1150
+ """
1151
+ Internal wrapper for resize_image used in the context of a batch resize operation.
1152
+ """
1153
+
1154
+ input_fn_abs = input_output_files[0]
1155
+ output_fn_abs = input_output_files[1]
1156
+ os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
1157
+ try:
1158
+ _ = resize_image(input_fn_abs,
1159
+ output_file=output_fn_abs,
1160
+ target_width=target_width, target_height=target_height,
1161
+ no_enlarge_width=no_enlarge_width, verbose=verbose, quality=quality)
1162
+ status = 'success'
1163
+ error = None
1164
+ except Exception as e:
1165
+ if verbose:
1166
+ print('Error resizing {}: {}'.format(input_fn_abs,str(e)))
1167
+ status = 'error'
1168
+ error = str(e)
1169
+
1170
+ return {'input_fn':input_fn_abs,'output_fn':output_fn_abs,status:'status',
1171
+ 'error':error}
1172
+
1173
+ # ..._resize_absolute_image(...)
1174
+
1175
+
1176
+ def resize_images(input_file_to_output_file,
1177
+ target_width=-1,
1178
+ target_height=-1,
1179
+ no_enlarge_width=False,
1180
+ verbose=False,
1181
+ quality='keep',
1182
+ pool_type='process',
1183
+ n_workers=10):
1184
+ """
1185
+ Resizes all images the dictionary [input_file_to_output_file].
1186
+
1187
+ TODO: This is a little more redundant with resize_image_folder than I would like;
1188
+ refactor resize_image_folder to call resize_images. Not doing that yet because
1189
+ at the time I'm writing this comment, a lot of code depends on resize_image_folder
1190
+ and I don't want to rock the boat yet.
1191
+
1192
+ Args:
1193
+ input_file_to_output_file (dict): dict mapping images that exist to the locations
1194
+ where the resized versions should be written
1195
+ target_width (int, optional): width to which we should resize this image, or -1
1196
+ to let target_height determine the size
1197
+ target_height (int, optional): height to which we should resize this image, or -1
1198
+ to let target_width determine the size
1199
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
1200
+ [target width] is larger than the original image width, does not modify the image,
1201
+ but will write to output_file if supplied
1202
+ verbose (bool, optional): enable additional debug output
1203
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
1204
+ pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
1205
+ parallelization; ignored if n_workers <= 1
1206
+ n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
1207
+ to disable parallelization
1208
+
1209
+ Returns:
1210
+ list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1211
+ 'status' will be 'success' or 'error'; 'error' will be None for successful cases,
1212
+ otherwise will contain the image-specific error.
1213
+ """
1214
+
1215
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
1216
+
1217
+ input_output_file_pairs = []
1218
+
1219
+ # Reformat input files as (input,output) tuples
1220
+ for input_fn in input_file_to_output_file:
1221
+ input_output_file_pairs.append((input_fn,input_file_to_output_file[input_fn]))
1222
+
1223
+ if n_workers == 1:
1224
+
1225
+ results = []
1226
+ for i_o_file_pair in tqdm(input_output_file_pairs):
1227
+ results.append(_resize_absolute_image(i_o_file_pair,
1228
+ target_width=target_width,
1229
+ target_height=target_height,
1230
+ no_enlarge_width=no_enlarge_width,
1231
+ verbose=verbose,
1232
+ quality=quality))
1233
+
1234
+ else:
1235
+
1236
+ if pool_type == 'thread':
1237
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1238
+ else:
1239
+ assert pool_type == 'process'
1240
+ pool = Pool(n_workers); poolstring = 'processes'
1241
+
1242
+ if verbose:
1243
+ print('Starting resizing pool with {} {}'.format(n_workers,poolstring))
1244
+
1245
+ p = partial(_resize_absolute_image,
1246
+ target_width=target_width,
1247
+ target_height=target_height,
1248
+ no_enlarge_width=no_enlarge_width,
1249
+ verbose=verbose,
1250
+ quality=quality)
1251
+
1252
+ results = list(tqdm(pool.imap(p, input_output_file_pairs),total=len(input_output_file_pairs)))
1253
+
1254
+ return results
1255
+
1256
+ # ...def resize_images(...)
1257
+
1258
+
1259
+ def resize_image_folder(input_folder,
1260
+ output_folder=None,
1261
+ target_width=-1,
1262
+ target_height=-1,
1263
+ no_enlarge_width=False,
1264
+ verbose=False,
1265
+ quality='keep',
1266
+ pool_type='process',
1267
+ n_workers=10,
1268
+ recursive=True,
1269
+ image_files_relative=None):
1270
+ """
1271
+ Resize all images in a folder (defaults to recursive).
1272
+
1273
+ Defaults to in-place resizing (output_folder is optional).
1274
+
1275
+ Args:
1276
+ input_folder (str): folder in which we should find images to resize
1277
+ output_folder (str, optional): folder in which we should write resized images. If
1278
+ None, resizes images in place. Otherwise, maintains relative paths in the target
1279
+ folder.
1280
+ target_width (int, optional): width to which we should resize this image, or -1
1281
+ to let target_height determine the size
1282
+ target_height (int, optional): height to which we should resize this image, or -1
1283
+ to let target_width determine the size
1284
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
1285
+ [target width] is larger than the original image width, does not modify the image,
1286
+ but will write to output_file if supplied
1287
+ verbose (bool, optional): enable additional debug output
1288
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
1289
+ pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
1290
+ parallelization; ignored if n_workers <= 1
1291
+ n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
1292
+ to disable parallelization
1293
+ recursive (bool, optional): whether to search [input_folder] recursively for images.
1294
+ image_files_relative (list, optional): if not None, skips any relative paths not
1295
+ in this list.
1296
+
1297
+ Returns:
1298
+ list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1299
+ 'status' will be 'success' or 'error'; 'error' will be None for successful cases,
1300
+ otherwise will contain the image-specific error.
1301
+ """
1302
+
1303
+ assert os.path.isdir(input_folder), '{} is not a folder'.format(input_folder)
1304
+
1305
+ if output_folder is None:
1306
+ output_folder = input_folder
1307
+ else:
1308
+ os.makedirs(output_folder,exist_ok=True)
1309
+
1310
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
1311
+
1312
+ if image_files_relative is None:
1313
+
1314
+ if verbose:
1315
+ print('Enumerating images')
1316
+
1317
+ image_files_relative = find_images(input_folder,recursive=recursive,
1318
+ return_relative_paths=True,convert_slashes=True)
1319
+ if verbose:
1320
+ print('Found {} images'.format(len(image_files_relative)))
1321
+
1322
+ if n_workers == 1:
1323
+
1324
+ if verbose:
1325
+ print('Resizing images')
1326
+
1327
+ results = []
1328
+ for fn_relative in tqdm(image_files_relative):
1329
+ results.append(_resize_relative_image(fn_relative,
1330
+ input_folder=input_folder,
1331
+ output_folder=output_folder,
1332
+ target_width=target_width,
1333
+ target_height=target_height,
1334
+ no_enlarge_width=no_enlarge_width,
1335
+ verbose=verbose,
1336
+ quality=quality))
1337
+
1338
+ else:
1339
+
1340
+ if pool_type == 'thread':
1341
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1342
+ else:
1343
+ assert pool_type == 'process'
1344
+ pool = Pool(n_workers); poolstring = 'processes'
1345
+
1346
+ if verbose:
1347
+ print('Starting resizing pool with {} {}'.format(n_workers,poolstring))
1348
+
1349
+ p = partial(_resize_relative_image,
1350
+ input_folder=input_folder,
1351
+ output_folder=output_folder,
1352
+ target_width=target_width,
1353
+ target_height=target_height,
1354
+ no_enlarge_width=no_enlarge_width,
1355
+ verbose=verbose,
1356
+ quality=quality)
1357
+
1358
+ results = list(tqdm(pool.imap(p, image_files_relative),total=len(image_files_relative)))
1359
+
1360
+ return results
1361
+
1362
+ # ...def resize_image_folder(...)
1363
+
1364
+
1365
+ #%% Image integrity checking functions
1366
+
1367
+ def check_image_integrity(filename,modes=None):
1368
+ """
1369
+ Check whether we can successfully load an image via OpenCV and/or PIL.
1370
+
1371
+ Args:
1372
+ filename (str): the filename to evaluate
1373
+ modes (list, optional): a list containing one or more of:
1374
+
1375
+ - 'cv'
1376
+ - 'pil'
1377
+ - 'skimage'
1378
+ - 'jpeg_trailer'
1379
+
1380
+ 'jpeg_trailer' checks that the binary data ends with ffd9. It does not check whether
1381
+ the image is actually a jpeg, and even if it is, there are lots of reasons the image might not
1382
+ end with ffd9. It's also true the JPEGs that cause "premature end of jpeg segment" issues
1383
+ don't end with ffd9, so this may be a useful diagnostic. High precision, very low recall
1384
+ for corrupt jpegs.
1385
+
1386
+ Set to None to use all modes.
1387
+
1388
+ Returns:
1389
+ dict: a dict with a key called 'file' (the value of [filename]), one key for each string in
1390
+ [modes] (a success indicator for that mode, specifically a string starting with either
1391
+ 'success' or 'error').
1392
+ """
1393
+
1394
+ if modes is None:
1395
+ modes = ('cv','pil','skimage','jpeg_trailer')
1396
+ else:
1397
+ if isinstance(modes,str):
1398
+ modes = [modes]
1399
+ for mode in modes:
1400
+ assert mode in ('cv','pil','skimage'), 'Unrecognized mode {}'.format(mode)
1401
+
1402
+ assert os.path.isfile(filename), 'Could not find file {}'.format(filename)
1403
+
1404
+ result = {}
1405
+ result['file'] = filename
1406
+
1407
+ for mode in modes:
1408
+
1409
+ result[mode] = 'unknown'
1410
+ if mode == 'pil':
1411
+ try:
1412
+ pil_im = load_image(filename) # noqa
1413
+ assert pil_im is not None
1414
+ result[mode] = 'success'
1415
+ except Exception as e:
1416
+ result[mode] = 'error: {}'.format(str(e))
1417
+ elif mode == 'cv':
1418
+ try:
1419
+ cv_im = cv2.imread(filename)
1420
+ assert cv_im is not None, 'Unknown opencv read failure'
1421
+ numpy_im = np.asarray(cv_im) # noqa
1422
+ result[mode] = 'success'
1423
+ except Exception as e:
1424
+ result[mode] = 'error: {}'.format(str(e))
1425
+ elif mode == 'skimage':
1426
+ try:
1427
+ # This is not a standard dependency
1428
+ from skimage import io as skimage_io # noqa
1429
+ except Exception:
1430
+ result[mode] = 'could not import skimage, run pip install scikit-image'
1431
+ return result
1432
+ try:
1433
+ skimage_im = skimage_io.imread(filename) # noqa
1434
+ assert skimage_im is not None
1435
+ result[mode] = 'success'
1436
+ except Exception as e:
1437
+ result[mode] = 'error: {}'.format(str(e))
1438
+ elif mode == 'jpeg_trailer':
1439
+ # https://stackoverflow.com/a/48282863/16644970
1440
+ try:
1441
+ with open(filename, 'rb') as f:
1442
+ check_chars = f.read()[-2:]
1443
+ if check_chars != b'\xff\xd9':
1444
+ result[mode] = 'invalid jpeg trailer: {}'.format(str(check_chars))
1445
+ else:
1446
+ result[mode] = 'success'
1447
+ except Exception as e:
1448
+ result[mode] = 'error: {}'.format(str(e))
1449
+
1450
+ # ...for each mode
1451
+
1452
+ return result
1453
+
1454
+ # ...def check_image_integrity(...)
1455
+
1456
+
1457
+ def parallel_check_image_integrity(filenames,
1458
+ modes=None,
1459
+ max_workers=16,
1460
+ use_threads=True,
1461
+ recursive=True):
1462
+ """
1463
+ Check whether we can successfully load a list of images via OpenCV and/or PIL.
1464
+
1465
+ Args:
1466
+ filenames (list or str): a list of image filenames or a folder
1467
+ mode (list): see check_image_integrity() for documentation on the [modes] parameter
1468
+ max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
1469
+ parallelization
1470
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
1471
+ parallelization
1472
+ recursive (bool, optional): if [filenames] is a folder, whether to search recursively for images.
1473
+ Ignored if [filenames] is a list.
1474
+
1475
+ Returns:
1476
+ list: a list of dicts, each with a key called 'file' (the value of [filename]), one key for
1477
+ each string in [modes] (a success indicator for that mode, specifically a string starting
1478
+ with either 'success' or 'error').
1479
+ """
1480
+
1481
+ n_workers = min(max_workers,len(filenames))
1482
+
1483
+ if isinstance(filenames,str) and os.path.isdir(filenames):
1484
+ filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
1485
+
1486
+ print('Checking image integrity for {} filenames'.format(len(filenames)))
1487
+
1488
+ if n_workers <= 1:
1489
+
1490
+ results = []
1491
+ for filename in filenames:
1492
+ results.append(check_image_integrity(filename,modes=modes))
1493
+
1494
+ else:
1495
+
1496
+ if use_threads:
1497
+ pool = ThreadPool(n_workers)
1498
+ else:
1499
+ pool = Pool(n_workers)
1500
+
1501
+ results = list(tqdm(pool.imap(
1502
+ partial(check_image_integrity,modes=modes),filenames), total=len(filenames)))
1503
+
1504
+ return results
1505
+
1506
+
1507
+ #%% Test drivers
1508
+
1509
+ if False:
1510
+
1511
+ #%% Recursive resize test
1512
+
1513
+ from md_visualization.visualization_utils import resize_image_folder # noqa
1514
+
1515
+ input_folder = r"C:\temp\resize-test\in"
1516
+ output_folder = r"C:\temp\resize-test\out"
1517
+
1518
+ resize_results = resize_image_folder(input_folder,output_folder,
1519
+ target_width=1280,verbose=True,quality=85,no_enlarge_width=True,
1520
+ pool_type='process',n_workers=10)
1521
+
1522
+
1523
+ #%% Integrity checking test
1524
+
1525
+ from md_utils import md_tests
1526
+ options = md_tests.download_test_data()
1527
+ folder = options.scratch_dir
1528
+
1529
+ results = parallel_check_image_integrity(folder,max_workers=8)
1530
+
1531
+ modes = ['cv','pil','skimage','jpeg_trailer']
1532
+
1533
+ for r in results:
1534
+ for mode in modes:
1535
+ if r[mode] != 'success':
1536
+ s = r[mode]
1537
+ print('Mode {} failed for {}:\n{}\n'.format(mode,r['file'],s))