megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1973 @@
1
+ """
2
+
3
+ visualization_utils.py
4
+
5
+ Rendering functions shared across visualization scripts
6
+
7
+ """
8
+
9
+ #%% Constants and imports
10
+
11
+ import time
12
+ import numpy as np
13
+ import requests
14
+ import os
15
+ import cv2
16
+
17
+ from io import BytesIO
18
+ from PIL import Image, ImageFile, ImageFont, ImageDraw, ImageFilter
19
+ from PIL.ExifTags import TAGS
20
+ from multiprocessing.pool import ThreadPool
21
+ from multiprocessing.pool import Pool
22
+ from tqdm import tqdm
23
+ from functools import partial
24
+
25
+ from megadetector.utils.path_utils import find_images
26
+ from megadetector.data_management.annotations import annotation_constants
27
+ from megadetector.data_management.annotations.annotation_constants import \
28
+ detector_bbox_category_id_to_name
29
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
30
+
31
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
32
+
33
+ # Maps EXIF standard rotation identifiers to degrees. The value "1" indicates no
34
+ # rotation; this will be ignored. The values 2, 4, 5, and 7 are mirrored rotations,
35
+ # which are not supported (we'll assert() on this when we apply rotations).
36
+ EXIF_IMAGE_NO_ROTATION = 1
37
+ EXIF_IMAGE_ROTATIONS = {
38
+ 3: 180,
39
+ 6: 270,
40
+ 8: 90
41
+ }
42
+
43
+ TEXTALIGN_LEFT = 0
44
+ TEXTALIGN_RIGHT = 1
45
+ TEXTALIGN_CENTER = 2
46
+
47
+ VTEXTALIGN_TOP = 0
48
+ VTEXTALIGN_BOTTOM = 1
49
+
50
+ # Convert category ID from int to str
51
+ DEFAULT_DETECTOR_LABEL_MAP = {
52
+ str(k): v for k, v in detector_bbox_category_id_to_name.items()
53
+ }
54
+
55
+ # Constants controlling retry behavior when fetching images from URLs
56
+ n_retries = 10
57
+ retry_sleep_time = 0.01
58
+
59
+ # If we try to open an image from a URL, and we encounter any error in this list,
60
+ # we'll retry, otherwise it's just an error.
61
+ error_names_for_retry = ['ConnectionError']
62
+
63
+ DEFAULT_BOX_THICKNESS = 4
64
+ DEFAULT_LABEL_FONT_SIZE = 16
65
+
66
+ # Default color map for mapping integer category IDs to colors when rendering bounding
67
+ # boxes
68
+ DEFAULT_COLORS = [
69
+ 'AliceBlue', 'Red', 'RoyalBlue', 'Gold', 'Chartreuse', 'Aqua', 'Azure',
70
+ 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
71
+ 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson',
72
+ 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
73
+ 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
74
+ 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
75
+ 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'GoldenRod',
76
+ 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
77
+ 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
78
+ 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
79
+ 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
80
+ 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
81
+ 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
82
+ 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
83
+ 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
84
+ 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
85
+ 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
86
+ 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
87
+ 'RosyBrown', 'Aquamarine', 'SaddleBrown', 'Green', 'SandyBrown',
88
+ 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
89
+ 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
90
+ 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
91
+ 'WhiteSmoke', 'Yellow', 'YellowGreen'
92
+ ]
93
+
94
+ pil_tag_name_to_id = {v: k for k, v in TAGS.items()}
95
+
96
+
97
+ #%% Functions
98
+
99
+ def open_image(input_file, ignore_exif_rotation=False):
100
+ """
101
+ Opens an image in binary format using PIL.Image and converts to RGB mode.
102
+
103
+ Supports local files or URLs.
104
+
105
+ This operation is lazy; image will not be actually loaded until the first
106
+ operation that needs to load it (for example, resizing), so file opening
107
+ errors can show up later. load_image() is the non-lazy version of this function.
108
+
109
+ Args:
110
+ input_file (str or BytesIO): can be a path to an image file (anything
111
+ that PIL can open), a URL, or an image as a stream of bytes
112
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
113
+ even if we are loading a JPEG and that JPEG says it should be rotated
114
+
115
+ Returns:
116
+ PIL.Image.Image: A PIL Image object in RGB mode
117
+ """
118
+
119
+ if (isinstance(input_file, str)
120
+ and input_file.startswith(('http://', 'https://'))):
121
+ try:
122
+ response = requests.get(input_file)
123
+ except Exception as e:
124
+ print(f'Error retrieving image {input_file}: {e}')
125
+ success = False
126
+ if e.__class__.__name__ in error_names_for_retry:
127
+ for i_retry in range(0,n_retries):
128
+ try:
129
+ time.sleep(retry_sleep_time)
130
+ response = requests.get(input_file)
131
+ except Exception as e:
132
+ print(f'Error retrieving image {input_file} on retry {i_retry}: {e}')
133
+ continue
134
+ print('Succeeded on retry {}'.format(i_retry))
135
+ success = True
136
+ break
137
+ if not success:
138
+ raise
139
+ try:
140
+ image = Image.open(BytesIO(response.content))
141
+ except Exception as e:
142
+ print(f'Error opening image {input_file}: {e}')
143
+ raise
144
+
145
+ else:
146
+ image = Image.open(input_file)
147
+
148
+ # Convert to RGB if necessary
149
+ if image.mode not in ('RGBA', 'RGB', 'L', 'I;16'):
150
+ raise AttributeError(
151
+ f'Image {input_file} uses unsupported mode {image.mode}')
152
+ if image.mode == 'RGBA' or image.mode == 'L':
153
+ # PIL.Image.convert() returns a converted copy of this image
154
+ image = image.convert(mode='RGB')
155
+
156
+ if not ignore_exif_rotation:
157
+ # Alter orientation as needed according to EXIF tag 0x112 (274) for Orientation
158
+ #
159
+ # https://gist.github.com/dangtrinhnt/a577ece4cbe5364aad28
160
+ # https://www.media.mit.edu/pia/Research/deepview/exif.html
161
+ #
162
+ try:
163
+ exif = image._getexif()
164
+ orientation: int = exif.get(274, None)
165
+ if (orientation is not None) and (orientation != EXIF_IMAGE_NO_ROTATION):
166
+ assert orientation in EXIF_IMAGE_ROTATIONS, \
167
+ 'Mirrored rotations are not supported'
168
+ image = image.rotate(EXIF_IMAGE_ROTATIONS[orientation], expand=True)
169
+ except Exception:
170
+ pass
171
+
172
+ return image
173
+
174
+ # ...def open_image(...)
175
+
176
+
177
+ def _remove_exif_tags(pil_image, tags_to_remove):
178
+ """
179
+ Remove a set of tags by name from [pil_image]
180
+ """
181
+
182
+ exif = pil_image.getexif()
183
+ if exif is not None:
184
+ for tag_name in tags_to_remove:
185
+ if tag_name in pil_tag_name_to_id:
186
+ exif.pop(pil_tag_name_to_id[tag_name], None)
187
+ return exif
188
+
189
+ # ..._remove_exif_tags
190
+
191
+
192
+ def exif_preserving_save(pil_image,
193
+ output_file,
194
+ quality='keep',
195
+ default_quality=85,
196
+ verbose=False,
197
+ tags_to_exclude=None):
198
+ """
199
+ Saves [pil_image] to [output_file], making a moderate attempt to preserve EXIF
200
+ data and JPEG quality. Neither is guaranteed.
201
+
202
+ Also see:
203
+
204
+ https://discuss.dizzycoding.com/determining-jpg-quality-in-python-pil/
205
+
206
+ ...for more ways to preserve jpeg quality if quality='keep' doesn't do the trick.
207
+
208
+ Args:
209
+ pil_image (Image): the PIL Image object to save
210
+ output_file (str): the destination file
211
+ quality (str or int, optional): can be "keep" (default), or an integer from 0 to 100.
212
+ This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
213
+ and resize it in memory, for example, it's no longer a JPEG.
214
+ default_quality (int, optional): determines output quality when quality == 'keep' and we are
215
+ saving a non-JPEG source to a JPEG file
216
+ verbose (bool, optional): enable additional debug console output
217
+ tags_to_exclude (list, optional): tags to exclude from the output file
218
+ """
219
+
220
+ # Read EXIF metadata
221
+ # exif = pil_image.info['exif'] if ('exif' in pil_image.info) else None
222
+ exif = pil_image.getexif()
223
+
224
+ if isinstance(tags_to_exclude,str):
225
+ tags_to_exclude = [tags_to_exclude]
226
+
227
+ # Optionally remove some tags
228
+ if (exif is not None) and (tags_to_exclude is not None):
229
+ exif = _remove_exif_tags(pil_image,
230
+ tags_to_remove=tags_to_exclude)
231
+
232
+ # Quality preservation is only supported for JPEG sources.
233
+ if pil_image.format != "JPEG":
234
+ if quality == 'keep':
235
+ if verbose:
236
+ print('Warning: quality "keep" passed when saving a non-JPEG source (during save to {})'.format(
237
+ output_file))
238
+ quality = default_quality
239
+
240
+ # Some output formats don't support the quality parameter, so we try once with,
241
+ # and once without. This is a horrible cascade of if's, but it's a consequence of
242
+ # the fact that "None" is not supported for either "exif" or "quality".
243
+
244
+ try:
245
+
246
+ if exif is not None:
247
+ pil_image.save(output_file, exif=exif, quality=quality)
248
+ else:
249
+ pil_image.save(output_file, quality=quality)
250
+
251
+ except Exception:
252
+
253
+ if verbose:
254
+ print('Warning: failed to write {}, trying again without quality parameter'.format(output_file))
255
+ if exif is not None:
256
+ pil_image.save(output_file, exif=exif)
257
+ else:
258
+ pil_image.save(output_file)
259
+
260
+ # ...def exif_preserving_save(...)
261
+
262
+
263
+ def load_image(input_file, ignore_exif_rotation=False):
264
+ """
265
+ Loads an image file. This is the non-lazy version of open_file(); i.e.,
266
+ it forces image decoding before returning.
267
+
268
+ Args:
269
+ input_file (str or BytesIO): can be a path to an image file (anything
270
+ that PIL can open), a URL, or an image as a stream of bytes
271
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
272
+ even if we are loading a JPEG and that JPEG says it should be rotated
273
+
274
+ Returns:
275
+ PIL.Image.Image: a PIL Image object in RGB mode
276
+ """
277
+
278
+ image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
279
+ image.load()
280
+ return image
281
+
282
+
283
+ def resize_image(image,
284
+ target_width=-1,
285
+ target_height=-1,
286
+ output_file=None,
287
+ no_enlarge_width=False,
288
+ verbose=False,
289
+ quality='keep'):
290
+ """
291
+ Resizes a PIL Image object to the specified width and height; does not resize
292
+ in place. If either width or height are -1, resizes with aspect ratio preservation.
293
+
294
+ If target_width and target_height are both -1, does not modify the image, but
295
+ will write to output_file if supplied.
296
+
297
+ If no resizing is required, and an Image object is supplied, returns the original Image
298
+ object (i.e., does not copy).
299
+
300
+ Args:
301
+ image (Image or str): PIL Image object or a filename (local file or URL)
302
+ target_width (int, optional): width to which we should resize this image, or -1
303
+ to let target_height determine the size
304
+ target_height (int, optional): height to which we should resize this image, or -1
305
+ to let target_width determine the size
306
+ output_file (str, optional): file to which we should save this image; if None,
307
+ just returns the image without saving
308
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
309
+ [target width] is larger than the original image width, does not modify the image,
310
+ but will write to output_file if supplied
311
+ verbose (bool, optional): enable additional debug output
312
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
313
+
314
+ Returns:
315
+ PIL.Image.Image: the resized image, which may be the original image if no resizing is
316
+ required
317
+ """
318
+
319
+ image_fn = 'in_memory'
320
+ if isinstance(image,str):
321
+ image_fn = image
322
+ image = load_image(image)
323
+
324
+ if target_width is None:
325
+ target_width = -1
326
+
327
+ if target_height is None:
328
+ target_height = -1
329
+
330
+ resize_required = True
331
+
332
+ # No resize was requested, this is always a no-op
333
+ if target_width == -1 and target_height == -1:
334
+
335
+ resize_required = False
336
+
337
+ # Does either dimension need to scale according to the other?
338
+ elif target_width == -1 or target_height == -1:
339
+
340
+ # Aspect ratio as width over height
341
+ # ar = w / h
342
+ aspect_ratio = image.size[0] / image.size[1]
343
+
344
+ if target_width != -1:
345
+ # h = w / ar
346
+ target_height = int(target_width / aspect_ratio)
347
+ else:
348
+ # w = ar * h
349
+ target_width = int(aspect_ratio * target_height)
350
+
351
+ # If we're not enlarging images and this would be an enlarge operation
352
+ if (no_enlarge_width) and (target_width > image.size[0]):
353
+
354
+ if verbose:
355
+ print('Bypassing image enlarge for {} --> {}'.format(
356
+ image_fn,str(output_file)))
357
+ resize_required = False
358
+
359
+ # If the target size is the same as the original size
360
+ if (target_width == image.size[0]) and (target_height == image.size[1]):
361
+
362
+ resize_required = False
363
+
364
+ if not resize_required:
365
+
366
+ if output_file is not None:
367
+ if verbose:
368
+ print('No resize required for resize {} --> {}'.format(
369
+ image_fn,str(output_file)))
370
+ exif_preserving_save(image,output_file,quality=quality,verbose=verbose)
371
+ return image
372
+
373
+ assert target_width > 0 and target_height > 0, \
374
+ 'Invalid image resize target {},{}'.format(target_width,target_height)
375
+
376
+ # The antialiasing parameter changed between Pillow versions 9 and 10, and for a bit,
377
+ # I'd like to support both.
378
+ try:
379
+ resized_image = image.resize((target_width, target_height), Image.ANTIALIAS)
380
+ except Exception:
381
+ resized_image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
382
+
383
+ if output_file is not None:
384
+ exif_preserving_save(resized_image,output_file,quality=quality,verbose=verbose)
385
+
386
+ return resized_image
387
+
388
+ # ...def resize_image(...)
389
+
390
+
391
+ def crop_image(detections, image, confidence_threshold=0.15, expansion=0):
392
+ """
393
+ Crops detections above [confidence_threshold] from the PIL image [image],
394
+ returning a list of PIL Images, preserving the order of [detections].
395
+
396
+ Args:
397
+ detections (list): a list of dictionaries with keys 'conf' and 'bbox';
398
+ boxes are length-four arrays formatted as [x,y,w,h], normalized,
399
+ upper-left origin (this is the standard MD detection format)
400
+ image (Image or str): the PIL Image object from which we should crop detections,
401
+ or an image filename
402
+ confidence_threshold (float, optional): only crop detections above this threshold
403
+ expansion (int, optional): a number of pixels to include on each side of a cropped
404
+ detection
405
+
406
+ Returns:
407
+ list: a possibly-empty list of PIL Image objects
408
+ """
409
+
410
+ ret_images = []
411
+
412
+ if isinstance(image,str):
413
+ image = load_image(image)
414
+
415
+ for detection in detections:
416
+
417
+ score = float(detection['conf'])
418
+
419
+ if (confidence_threshold is None) or (score >= confidence_threshold):
420
+
421
+ x1, y1, w_box, h_box = detection['bbox']
422
+ ymin,xmin,ymax,xmax = y1, x1, y1 + h_box, x1 + w_box
423
+
424
+ # Convert to pixels so we can use the PIL crop() function
425
+ im_width, im_height = image.size
426
+ (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
427
+ ymin * im_height, ymax * im_height)
428
+
429
+ if expansion > 0:
430
+ left -= expansion
431
+ right += expansion
432
+ top -= expansion
433
+ bottom += expansion
434
+
435
+ # PIL's crop() does surprising things if you provide values outside of
436
+ # the image, clip inputs
437
+ left = max(left,0); right = max(right,0)
438
+ top = max(top,0); bottom = max(bottom,0)
439
+
440
+ left = min(left,im_width-1); right = min(right,im_width-1)
441
+ top = min(top,im_height-1); bottom = min(bottom,im_height-1)
442
+
443
+ ret_images.append(image.crop((left, top, right, bottom)))
444
+
445
+ # ...if this detection is above threshold
446
+
447
+ # ...for each detection
448
+
449
+ return ret_images
450
+
451
+ # ...def crop_image(...)
452
+
453
+
454
+ def blur_detections(image,detections,blur_radius=40):
455
+ """
456
+ Blur the regions in [image] corresponding to the MD-formatted list [detections].
457
+ [image] is modified in place.
458
+
459
+ Args:
460
+ image (PIL.Image.Image): image in which we should blur specific regions
461
+ detections (list): list of detections in the MD output format, see render
462
+ detection_bounding_boxes for more detail.
463
+ blur_radius (int, optional): radius of blur kernel in pixels
464
+ """
465
+
466
+ img_width, img_height = image.size
467
+
468
+ for d in detections:
469
+
470
+ bbox = d['bbox']
471
+ x_norm, y_norm, width_norm, height_norm = bbox
472
+
473
+ # Calculate absolute pixel coordinates
474
+ x = int(x_norm * img_width)
475
+ y = int(y_norm * img_height)
476
+ width = int(width_norm * img_width)
477
+ height = int(height_norm * img_height)
478
+
479
+ # Calculate box boundaries
480
+ left = max(0, x)
481
+ top = max(0, y)
482
+ right = min(img_width, x + width)
483
+ bottom = min(img_height, y + height)
484
+
485
+ # Crop the region, blur it, and paste it back
486
+ region = image.crop((left, top, right, bottom))
487
+ blurred_region = region.filter(ImageFilter.GaussianBlur(radius=blur_radius))
488
+ image.paste(blurred_region, (left, top))
489
+
490
+ # ...for each detection
491
+
492
+ # ...def blur_detections(...)
493
+
494
+
495
+ def render_detection_bounding_boxes(detections,
496
+ image,
497
+ label_map='show_categories',
498
+ classification_label_map=None,
499
+ confidence_threshold=0.0,
500
+ thickness=DEFAULT_BOX_THICKNESS,
501
+ expansion=0,
502
+ classification_confidence_threshold=0.3,
503
+ max_classifications=3,
504
+ colormap=None,
505
+ textalign=TEXTALIGN_LEFT,
506
+ vtextalign=VTEXTALIGN_TOP,
507
+ label_font_size=DEFAULT_LABEL_FONT_SIZE,
508
+ custom_strings=None,
509
+ box_sort_order='confidence',
510
+ verbose=False):
511
+ """
512
+ Renders bounding boxes (with labels and confidence values) on an image for all
513
+ detections above a threshold.
514
+
515
+ Renders classification labels if present.
516
+
517
+ [image] is modified in place.
518
+
519
+ Args:
520
+ detections (list): list of detections in the MD output format, for example:
521
+
522
+ .. code-block::none
523
+
524
+ [
525
+ {
526
+ "category": "2",
527
+ "conf": 0.996,
528
+ "bbox": [
529
+ 0.0,
530
+ 0.2762,
531
+ 0.1234,
532
+ 0.2458
533
+ ]
534
+ }
535
+ ]
536
+
537
+ ...where the bbox coordinates are [x, y, box_width, box_height].
538
+
539
+ (0, 0) is the upper-left. Coordinates are normalized.
540
+
541
+ Supports classification results, in the standard format:
542
+
543
+ .. code-block::none
544
+
545
+ [
546
+ {
547
+ "category": "2",
548
+ "conf": 0.996,
549
+ "bbox": [
550
+ 0.0,
551
+ 0.2762,
552
+ 0.1234,
553
+ 0.2458
554
+ ]
555
+ "classifications": [
556
+ ["3", 0.901],
557
+ ["1", 0.071],
558
+ ["4", 0.025]
559
+ ]
560
+ }
561
+ ]
562
+
563
+ image (PIL.Image.Image): image on which we should render detections
564
+ label_map (dict, optional): optional, mapping the numeric label to a string name. The type of the
565
+ numeric label (typically strings) needs to be consistent with the keys in label_map; no casting is
566
+ carried out. If [label_map] is None, no labels are shown (not even numbers and confidence values).
567
+ If you want category numbers and confidence values without class labels, use the default value,
568
+ the string 'show_categories'.
569
+ classification_label_map (dict, optional): optional, mapping of the string class labels to the actual
570
+ class names. The type of the numeric label (typically strings) needs to be consistent with the keys
571
+ in label_map; no casting is carried out. If [label_map] is None, no labels are shown (not even numbers
572
+ and confidence values).
573
+ confidence_threshold (float or dict, optional): threshold above which boxes are rendered. Can also be a
574
+ dictionary mapping category IDs to thresholds.
575
+ thickness (int, optional): line thickness in pixels
576
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
577
+ classification_confidence_threshold (float, optional): confidence above which classification results
578
+ are displayed
579
+ max_classifications (int, optional): maximum number of classification results rendered for one image
580
+ colormap (list, optional): list of color names, used to choose colors for categories by
581
+ indexing with the values in [classes]; defaults to a reasonable set of colors
582
+ textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
583
+ vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
584
+ label_font_size (float, optional): font size for labels
585
+ custom_strings (list of str, optional): optional set of strings to append to detection labels, should
586
+ have the same length as [detections]. Appended before any classification labels.
587
+ box_sort_order (str, optional): sorting scheme for detection boxes, can be None, "confidence", or
588
+ "reverse_confidence". "confidence" puts the highest-confidence boxes on top.
589
+ verbose (bool, optional): enable additional debug output
590
+ """
591
+
592
+ # Input validation
593
+ if (label_map is not None) and (isinstance(label_map,str)) and (label_map == 'show_categories'):
594
+ label_map = {}
595
+
596
+ if custom_strings is not None:
597
+ assert len(custom_strings) == len(detections), \
598
+ '{} custom strings provided for {} detections'.format(
599
+ len(custom_strings),len(detections))
600
+
601
+ display_boxes = []
602
+
603
+ # list of lists, one list of strings for each bounding box (to accommodate multiple labels)
604
+ display_strs = []
605
+
606
+ # for color selection
607
+ classes = []
608
+
609
+ if box_sort_order is not None:
610
+
611
+ if box_sort_order == 'confidence':
612
+ detections = sort_list_of_dicts_by_key(detections,k='conf',reverse=False)
613
+ elif box_sort_order == 'reverse_confidence':
614
+ detections = sort_list_of_dicts_by_key(detections,k='conf',reverse=True)
615
+ else:
616
+ raise ValueError('Unrecognized sorting scheme {}'.format(box_sort_order))
617
+
618
+ for i_detection,detection in enumerate(detections):
619
+
620
+ score = detection['conf']
621
+
622
+ if isinstance(confidence_threshold,dict):
623
+ rendering_threshold = confidence_threshold[detection['category']]
624
+ else:
625
+ rendering_threshold = confidence_threshold
626
+
627
+ # Always render objects with a confidence of "None", this is typically used
628
+ # for ground truth data.
629
+ if (score is None) or (rendering_threshold is None) or (score >= rendering_threshold):
630
+
631
+ x1, y1, w_box, h_box = detection['bbox']
632
+ display_boxes.append([y1, x1, y1 + h_box, x1 + w_box])
633
+
634
+ # The class index to use for coloring this box, which may be based on the detection
635
+ # category or on the most confident classification category.
636
+ clss = detection['category']
637
+
638
+ # This will be a list of strings that should be rendered above/below this box
639
+ displayed_label = []
640
+
641
+ if label_map is not None:
642
+ label = label_map[clss] if clss in label_map else clss
643
+ if score is not None:
644
+ displayed_label = ['{}: {}%'.format(label, round(100 * score))]
645
+ else:
646
+ displayed_label = ['{}'.format(label)]
647
+ else:
648
+ displayed_label = ['']
649
+
650
+ if custom_strings is not None:
651
+ custom_string = custom_strings[i_detection]
652
+ if custom_string is not None and len(custom_string) > 0:
653
+ assert len(displayed_label) == 1
654
+ displayed_label[0] += ' ' + custom_string
655
+
656
+ if ('classifications' in detection) and len(detection['classifications']) > 0:
657
+
658
+ classifications = detection['classifications']
659
+
660
+ if len(classifications) > max_classifications:
661
+ classifications = classifications[0:max_classifications]
662
+
663
+ max_classification_category = 0
664
+ max_classification_conf = -100
665
+
666
+ for classification in classifications:
667
+
668
+ classification_conf = classification[1]
669
+ if classification_conf is None or \
670
+ classification_conf < classification_confidence_threshold:
671
+ continue
672
+
673
+ class_key = classification[0]
674
+
675
+ # Is this the most confident classification for this detection?
676
+ if classification_conf > max_classification_conf:
677
+ max_classification_conf = classification_conf
678
+ max_classification_category = int(class_key)
679
+
680
+ if (classification_label_map is not None) and (class_key in classification_label_map):
681
+ class_name = classification_label_map[class_key]
682
+ else:
683
+ class_name = class_key
684
+ if classification_conf is not None:
685
+ displayed_label += ['{}: {:5.1%}'.format(class_name.lower(), classification_conf)]
686
+ else:
687
+ displayed_label += ['{}'.format(class_name.lower())]
688
+
689
+ # ...for each classification
690
+
691
+ # To avoid duplicate colors with detection-only visualization, offset
692
+ # the classification class index by the number of detection classes
693
+ clss = annotation_constants.NUM_DETECTOR_CATEGORIES + max_classification_category
694
+
695
+ # ...if we have classification results
696
+
697
+ # display_strs is a list of labels for each box
698
+ display_strs.append(displayed_label)
699
+ classes.append(clss)
700
+
701
+ # ...if the confidence of this detection is above threshold
702
+
703
+ # ...for each detection
704
+
705
+ display_boxes = np.array(display_boxes)
706
+
707
+ if verbose:
708
+ print('Rendering {} of {} detections'.format(len(display_boxes),len(detections)))
709
+
710
+ draw_bounding_boxes_on_image(image,
711
+ display_boxes,
712
+ classes,
713
+ display_strs=display_strs,
714
+ thickness=thickness,
715
+ expansion=expansion,
716
+ colormap=colormap,
717
+ textalign=textalign,
718
+ vtextalign=vtextalign,
719
+ label_font_size=label_font_size)
720
+
721
+ # ...render_detection_bounding_boxes(...)
722
+
723
+
724
+ def draw_bounding_boxes_on_image(image,
725
+ boxes,
726
+ classes,
727
+ thickness=DEFAULT_BOX_THICKNESS,
728
+ expansion=0,
729
+ display_strs=None,
730
+ colormap=None,
731
+ textalign=TEXTALIGN_LEFT,
732
+ vtextalign=VTEXTALIGN_TOP,
733
+ text_rotation=None,
734
+ label_font_size=DEFAULT_LABEL_FONT_SIZE):
735
+ """
736
+ Draws bounding boxes on an image. Modifies the image in place.
737
+
738
+ Args:
739
+ image (PIL.Image): the image on which we should draw boxes
740
+ boxes (np.array): a two-dimensional numpy array of size [N, 4], where N is the
741
+ number of boxes, and each row is (ymin, xmin, ymax, xmax). Coordinates should be
742
+ normalized to image height/width.
743
+ classes (list): a list of ints or string-formatted ints corresponding to the
744
+ class labels of the boxes. This is only used for color selection. Should have the same
745
+ length as [boxes].
746
+ thickness (int, optional): line thickness in pixels
747
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
748
+ display_strs (list, optional): list of list of strings (the outer list should have the
749
+ same length as [boxes]). Typically this is used to show (possibly multiple) detection
750
+ or classification categories and/or confidence values.
751
+ colormap (list, optional): list of color names, used to choose colors for categories by
752
+ indexing with the values in [classes]; defaults to a reasonable set of colors
753
+ textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
754
+ vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
755
+ text_rotation (float, optional): rotation to apply to text
756
+ label_font_size (float, optional): font size for labels
757
+ """
758
+
759
+ boxes_shape = boxes.shape
760
+ if not boxes_shape:
761
+ return
762
+ if len(boxes_shape) != 2 or boxes_shape[1] != 4:
763
+ return
764
+ for i in range(boxes_shape[0]):
765
+ display_str_list = None
766
+ if display_strs:
767
+ display_str_list = display_strs[i]
768
+ draw_bounding_box_on_image(image,
769
+ boxes[i, 0], boxes[i, 1], boxes[i, 2], boxes[i, 3],
770
+ classes[i],
771
+ thickness=thickness, expansion=expansion,
772
+ display_str_list=display_str_list,
773
+ colormap=colormap,
774
+ textalign=textalign,
775
+ vtextalign=vtextalign,
776
+ text_rotation=text_rotation,
777
+ label_font_size=label_font_size)
778
+
779
+ # ...draw_bounding_boxes_on_image(...)
780
+
781
+
782
+ def get_text_size(font,s):
783
+ """
784
+ Get the expected width and height when rendering the string [s] in the font
785
+ [font].
786
+
787
+ Args:
788
+ font (PIL.ImageFont): the font whose size we should query
789
+ s (str): the string whose size we should query
790
+
791
+ Returns:
792
+ tuple: (w,h), both floats in pixel coordinates
793
+ """
794
+
795
+ # This is what we did w/Pillow 9
796
+ # w,h = font.getsize(s)
797
+
798
+ # I would *think* this would be the equivalent for Pillow 10
799
+ # l,t,r,b = font.getbbox(s); w = r-l; h=b-t
800
+
801
+ # ...but this actually produces the most similar results to Pillow 9
802
+ # l,t,r,b = font.getbbox(s); w = r; h=b
803
+
804
+ try:
805
+ l,t,r,b = font.getbbox(s); w = r; h=b # noqa
806
+ except Exception:
807
+ w,h = font.getsize(s)
808
+
809
+ return w,h
810
+
811
+
812
+ def draw_bounding_box_on_image(image,
813
+ ymin,
814
+ xmin,
815
+ ymax,
816
+ xmax,
817
+ clss=None,
818
+ thickness=DEFAULT_BOX_THICKNESS,
819
+ expansion=0,
820
+ display_str_list=None,
821
+ use_normalized_coordinates=True,
822
+ label_font_size=DEFAULT_LABEL_FONT_SIZE,
823
+ colormap=None,
824
+ textalign=TEXTALIGN_LEFT,
825
+ vtextalign=VTEXTALIGN_TOP,
826
+ text_rotation=None):
827
+ """
828
+ Adds a bounding box to an image. Modifies the image in place.
829
+
830
+ Bounding box coordinates can be specified in either absolute (pixel) or
831
+ normalized coordinates by setting the use_normalized_coordinates argument.
832
+
833
+ Each string in display_str_list is displayed on a separate line above the
834
+ bounding box in black text on a rectangle filled with the input 'color'.
835
+ If the top of the bounding box extends to the edge of the image, the strings
836
+ are displayed below the bounding box.
837
+
838
+ Adapted from:
839
+
840
+ https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
841
+
842
+ Args:
843
+ image (PIL.Image.Image): the image on which we should draw a box
844
+ ymin (float): ymin of bounding box
845
+ xmin (float): xmin of bounding box
846
+ ymax (float): ymax of bounding box
847
+ xmax (float): xmax of bounding box
848
+ clss (int, optional): the class index of the object in this bounding box, used for choosing
849
+ a color; should be either an integer or a string-formatted integer
850
+ thickness (int, optional): line thickness in pixels
851
+ expansion (int, optional): number of pixels to expand bounding boxes on each side
852
+ display_str_list (list, optional): list of strings to display above the box (each to be shown on its
853
+ own line)
854
+ use_normalized_coordinates (bool, optional): if True (default), treat coordinates
855
+ ymin, xmin, ymax, xmax as relative to the image, otherwise coordinates as absolute pixel values
856
+ label_font_size (float, optional): font size
857
+ colormap (list, optional): list of color names, used to choose colors for categories by
858
+ indexing with the values in [classes]; defaults to a reasonable set of colors
859
+ textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
860
+ vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
861
+ text_rotation (float, optional): rotation to apply to text
862
+ """
863
+
864
+ if colormap is None:
865
+ colormap = DEFAULT_COLORS
866
+
867
+ if display_str_list is None:
868
+ display_str_list = []
869
+
870
+ if clss is None:
871
+ # Default to the MegaDetector animal class ID (1)
872
+ color = colormap[1]
873
+ else:
874
+ color = colormap[int(clss) % len(colormap)]
875
+
876
+ draw = ImageDraw.Draw(image)
877
+ im_width, im_height = image.size
878
+ if use_normalized_coordinates:
879
+ (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
880
+ ymin * im_height, ymax * im_height)
881
+ else:
882
+ (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
883
+
884
+ if expansion > 0:
885
+
886
+ left -= expansion
887
+ right += expansion
888
+ top -= expansion
889
+ bottom += expansion
890
+
891
+ # Deliberately trimming to the width of the image only in the case where
892
+ # box expansion is turned on. There's not an obvious correct behavior here,
893
+ # but the thinking is that if the caller provided an out-of-range bounding
894
+ # box, they meant to do that, but at least in the eyes of the person writing
895
+ # this comment, if you expand a box for visualization reasons, you don't want
896
+ # to end up with part of a box.
897
+ #
898
+ # A slightly more sophisticated might check whether it was in fact the expansion
899
+ # that made this box larger than the image, but this is the case 99.999% of the time
900
+ # here, so that doesn't seem necessary.
901
+ left = max(left,0); right = max(right,0)
902
+ top = max(top,0); bottom = max(bottom,0)
903
+
904
+ left = min(left,im_width-1); right = min(right,im_width-1)
905
+ top = min(top,im_height-1); bottom = min(bottom,im_height-1)
906
+
907
+ # ...if we need to expand boxes
908
+
909
+ draw.line([(left, top), (left, bottom), (right, bottom),
910
+ (right, top), (left, top)], width=thickness, fill=color)
911
+
912
+ if display_str_list is not None:
913
+
914
+ try:
915
+ font = ImageFont.truetype('arial.ttf', label_font_size)
916
+ except OSError:
917
+ font = ImageFont.load_default()
918
+
919
+ display_str_heights = [get_text_size(font,ds)[1] for ds in display_str_list]
920
+
921
+ # Each display_str has a top and bottom margin of 0.05x.
922
+ total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
923
+
924
+ # Reverse list and print from bottom to top
925
+ for i_str,display_str in enumerate(display_str_list[::-1]):
926
+
927
+ # Skip empty strings
928
+ if len(display_str) == 0:
929
+ continue
930
+
931
+ text_width, text_height = get_text_size(font,display_str)
932
+ margin = int(np.ceil(0.05 * text_height))
933
+
934
+ if text_rotation is not None and text_rotation != 0:
935
+
936
+ assert text_rotation == -90, \
937
+ 'Only -90-degree text rotation is supported'
938
+
939
+ image_tmp = Image.new('RGB',(text_width+2*margin,text_height+2*margin))
940
+ image_tmp_draw = ImageDraw.Draw(image_tmp)
941
+ image_tmp_draw.rectangle([0,0,text_width+2*margin,text_height+2*margin],fill=color)
942
+ image_tmp_draw.text( (margin,margin), display_str, font=font, fill='black')
943
+ rotated_text = image_tmp.rotate(text_rotation,expand=1)
944
+
945
+ if textalign == TEXTALIGN_RIGHT:
946
+ text_left = right
947
+ else:
948
+ text_left = left
949
+ text_left = int(text_left + (text_height) * i_str)
950
+
951
+ if vtextalign == VTEXTALIGN_BOTTOM:
952
+ text_top = bottom - text_width
953
+ else:
954
+ text_top = top
955
+ text_left = int(text_left)
956
+ text_top = int(text_top)
957
+
958
+ image.paste(rotated_text,[text_left,text_top])
959
+
960
+ else:
961
+
962
+ # If the total height of the display strings added to the top of the bounding
963
+ # box exceeds the top of the image, stack the strings below the bounding box
964
+ # instead of above, and vice-versa if we're bottom-aligning.
965
+ #
966
+ # If the text just doesn't fit outside the box, we don't try anything fancy,
967
+ # it will just appear outside the image.
968
+ if vtextalign == VTEXTALIGN_TOP:
969
+ text_bottom = top
970
+ if (text_bottom - total_display_str_height) < 0:
971
+ text_bottom = bottom + total_display_str_height
972
+ else:
973
+ assert vtextalign == VTEXTALIGN_BOTTOM, \
974
+ 'Unrecognized vertical text alignment {}'.format(vtextalign)
975
+ text_bottom = bottom + total_display_str_height
976
+ if (text_bottom + total_display_str_height) > im_height:
977
+ text_bottom = top
978
+
979
+ text_bottom = int(text_bottom) - i_str * (int(text_height + (2 * margin)))
980
+
981
+ text_left = left
982
+
983
+ if textalign == TEXTALIGN_RIGHT:
984
+ text_left = right - text_width
985
+ elif textalign == TEXTALIGN_CENTER:
986
+ text_left = ((right + left) / 2.0) - (text_width / 2.0)
987
+ text_left = int(text_left)
988
+
989
+ draw.rectangle(
990
+ [(text_left, (text_bottom - text_height) - (2 * margin)),
991
+ (text_left + text_width, text_bottom)],
992
+ fill=color)
993
+
994
+ draw.text(
995
+ (text_left + margin, text_bottom - text_height - margin),
996
+ display_str,
997
+ fill='black',
998
+ font=font)
999
+
1000
+ # ...if we're rotating text
1001
+
1002
+ # ...if we're rendering text
1003
+
1004
+ # ...def draw_bounding_box_on_image(...)
1005
+
1006
+
1007
+ def render_megadb_bounding_boxes(boxes_info, image):
1008
+ """
1009
+ Render bounding boxes to an image, where those boxes are in the mostly-deprecated
1010
+ MegaDB format, which looks like:
1011
+
1012
+ .. code-block::none
1013
+
1014
+ {
1015
+ "category": "animal",
1016
+ "bbox": [
1017
+ 0.739,
1018
+ 0.448,
1019
+ 0.187,
1020
+ 0.198
1021
+ ]
1022
+ }
1023
+
1024
+ Args:
1025
+ boxes_info (list): list of dicts, each dict represents a single detection
1026
+ where bbox coordinates are normalized [x_min, y_min, width, height]
1027
+ image (PIL.Image.Image): image to modify
1028
+
1029
+ :meta private:
1030
+ """
1031
+
1032
+ display_boxes = []
1033
+ display_strs = []
1034
+ classes = [] # ints, for selecting colors
1035
+
1036
+ for b in boxes_info:
1037
+ x_min, y_min, w_rel, h_rel = b['bbox']
1038
+ y_max = y_min + h_rel
1039
+ x_max = x_min + w_rel
1040
+ display_boxes.append([y_min, x_min, y_max, x_max])
1041
+ display_strs.append([b['category']])
1042
+ classes.append(annotation_constants.detector_bbox_category_name_to_id[b['category']])
1043
+
1044
+ display_boxes = np.array(display_boxes)
1045
+ draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs)
1046
+
1047
+ # ...def render_iMerit_boxes(...)
1048
+
1049
+
1050
+ def render_db_bounding_boxes(boxes,
1051
+ classes,
1052
+ image,
1053
+ original_size=None,
1054
+ label_map=None,
1055
+ thickness=DEFAULT_BOX_THICKNESS,
1056
+ expansion=0,
1057
+ colormap=None,
1058
+ textalign=TEXTALIGN_LEFT,
1059
+ vtextalign=VTEXTALIGN_TOP,
1060
+ text_rotation=None,
1061
+ label_font_size=DEFAULT_LABEL_FONT_SIZE,
1062
+ tags=None,
1063
+ boxes_are_normalized=False):
1064
+ """
1065
+ Render bounding boxes (with class labels) on an image. This is a wrapper for
1066
+ draw_bounding_boxes_on_image, allowing the caller to operate on a resized image
1067
+ by providing the original size of the image; boxes will be scaled accordingly.
1068
+
1069
+ This function assumes that bounding boxes are in absolute coordinates, typically
1070
+ because they come from COCO camera traps .json files, unless boxes_are_normalized
1071
+ is True.
1072
+
1073
+ Args:
1074
+ boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
1075
+ classes (list): list of ints (or string-formatted ints), used to choose labels (either
1076
+ by literally rendering the class labels, or by indexing into [label_map])
1077
+ image (PIL.Image.Image): image object to modify
1078
+ original_size (tuple, optional): if this is not None, and the size is different than
1079
+ the size of [image], we assume that [boxes] refer to the original size, and we scale
1080
+ them accordingly before rendering
1081
+ label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
1082
+ species labels; if None, category labels are rendered verbatim (typically as numbers)
1083
+ thickness (int, optional): line width
1084
+ expansion (int, optional): a number of pixels to include on each side of a cropped
1085
+ detection
1086
+ colormap (list, optional): list of color names, used to choose colors for categories by
1087
+ indexing with the values in [classes]; defaults to a reasonable set of colors
1088
+ textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
1089
+ vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
1090
+ text_rotation (float, optional): rotation to apply to text
1091
+ label_font_size (float, optional): font size for labels
1092
+ tags (list, optional): list of strings of length len(boxes) that should be appended
1093
+ after each class name (e.g. to show scores)
1094
+ boxes_are_normalized (bool, optional): whether boxes have already been normalized
1095
+ """
1096
+
1097
+ display_boxes = []
1098
+ display_strs = []
1099
+
1100
+ if original_size is not None:
1101
+ image_size = original_size
1102
+ else:
1103
+ image_size = image.size
1104
+
1105
+ img_width, img_height = image_size
1106
+
1107
+ for i_box in range(0,len(boxes)):
1108
+
1109
+ box = boxes[i_box]
1110
+ clss = classes[i_box]
1111
+
1112
+ x_min_abs, y_min_abs, width_abs, height_abs = box[0:4]
1113
+
1114
+ # Normalize boxes if necessary
1115
+ if boxes_are_normalized:
1116
+
1117
+ xmin = x_min_abs
1118
+ xmax = x_min_abs + width_abs
1119
+ ymin = y_min_abs
1120
+ ymax = y_min_abs + height_abs
1121
+
1122
+ else:
1123
+
1124
+ ymin = y_min_abs / img_height
1125
+ ymax = ymin + height_abs / img_height
1126
+
1127
+ xmin = x_min_abs / img_width
1128
+ xmax = xmin + width_abs / img_width
1129
+
1130
+ display_boxes.append([ymin, xmin, ymax, xmax])
1131
+
1132
+ if label_map:
1133
+ clss = label_map[int(clss)]
1134
+
1135
+ display_str = str(clss)
1136
+
1137
+ # Do we have a tag to append to the class string?
1138
+ if tags is not None and tags[i_box] is not None and len(tags[i_box]) > 0:
1139
+ display_str += ' ' + tags[i_box]
1140
+
1141
+ # need to be a string here because PIL needs to iterate through chars
1142
+ display_strs.append([display_str])
1143
+
1144
+ # ...for each box
1145
+
1146
+ display_boxes = np.array(display_boxes)
1147
+
1148
+ draw_bounding_boxes_on_image(image,
1149
+ display_boxes,
1150
+ classes,
1151
+ display_strs=display_strs,
1152
+ thickness=thickness,
1153
+ expansion=expansion,
1154
+ colormap=colormap,
1155
+ textalign=textalign,
1156
+ vtextalign=vtextalign,
1157
+ text_rotation=text_rotation,
1158
+ label_font_size=label_font_size)
1159
+
1160
+ # ...def render_db_bounding_boxes(...)
1161
+
1162
+
1163
+ def draw_bounding_boxes_on_file(input_file,
1164
+ output_file,
1165
+ detections,
1166
+ confidence_threshold=0.0,
1167
+ detector_label_map=DEFAULT_DETECTOR_LABEL_MAP,
1168
+ thickness=DEFAULT_BOX_THICKNESS,
1169
+ expansion=0,
1170
+ colormap=None,
1171
+ label_font_size=DEFAULT_LABEL_FONT_SIZE,
1172
+ custom_strings=None,
1173
+ target_size=None,
1174
+ ignore_exif_rotation=False):
1175
+ """
1176
+ Renders detection bounding boxes on an image loaded from file, optionally writing the results to
1177
+ a new image file.
1178
+
1179
+ Args:
1180
+ input_file (str): filename or URL to load
1181
+ output_file (str): filename to which we should write the rendered image
1182
+ detections (list): a list of dictionaries with keys 'conf', 'bbox', and 'category';
1183
+ boxes are length-four arrays formatted as [x,y,w,h], normalized,
1184
+ upper-left origin (this is the standard MD detection format). 'category' is a string-int.
1185
+ confidence_threshold (float, optional): only render detections with confidence above this
1186
+ threshold
1187
+ detector_label_map (dict, optional): a dict mapping category IDs to strings. If this
1188
+ is None, no confidence values or identifiers are shown. If this is {}, just category
1189
+ indices and confidence values are shown.
1190
+ thickness (int, optional): line width in pixels for box rendering
1191
+ expansion (int, optional): box expansion in pixels
1192
+ colormap (list, optional): list of color names, used to choose colors for categories by
1193
+ indexing with the values in [classes]; defaults to a reasonable set of colors
1194
+ label_font_size (float, optional): label font size
1195
+ custom_strings (list, optional): set of strings to append to detection labels, should have the
1196
+ same length as [detections]. Appended before any classification labels.
1197
+ target_size (tuple, optional): tuple of (target_width,target_height). Either or both can be -1,
1198
+ see resize_image() for documentation. If None or (-1,-1), uses the original image size.
1199
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
1200
+ even if we are loading a JPEG and that JPEG says it should be rotated.
1201
+
1202
+ Returns:
1203
+ PIL.Image.Image: loaded and modified image
1204
+ """
1205
+
1206
+ image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
1207
+
1208
+ if target_size is not None:
1209
+ image = resize_image(image,target_size[0],target_size[1])
1210
+
1211
+ render_detection_bounding_boxes(
1212
+ detections,
1213
+ image,
1214
+ label_map=detector_label_map,
1215
+ confidence_threshold=confidence_threshold,
1216
+ thickness=thickness,
1217
+ expansion=expansion,
1218
+ colormap=colormap,
1219
+ custom_strings=custom_strings,
1220
+ label_font_size=label_font_size)
1221
+
1222
+ if output_file is not None:
1223
+ image.save(output_file)
1224
+
1225
+ return image
1226
+
1227
+
1228
+ def draw_db_boxes_on_file(input_file,
1229
+ output_file,
1230
+ boxes,
1231
+ classes=None,
1232
+ label_map=None,
1233
+ thickness=DEFAULT_BOX_THICKNESS,
1234
+ expansion=0,
1235
+ ignore_exif_rotation=False):
1236
+ """
1237
+ Render COCO-formatted bounding boxes (in absolute coordinates) on an image loaded from file,
1238
+ writing the results to a new image file.
1239
+
1240
+ Args:
1241
+ input_file (str): image file to read
1242
+ output_file (str): image file to write
1243
+ boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
1244
+ classes (list, optional): list of ints (or string-formatted ints), used to choose
1245
+ labels (either by literally rendering the class labels, or by indexing into [label_map])
1246
+ label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
1247
+ species labels; if None, category labels are rendered verbatim (typically as numbers)
1248
+ thickness (int, optional): line width
1249
+ expansion (int, optional): a number of pixels to include on each side of a cropped
1250
+ detection
1251
+ ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
1252
+ even if we are loading a JPEG and that JPEG says it should be rotated
1253
+
1254
+ Returns:
1255
+ PIL.Image.Image: the loaded and modified image
1256
+ """
1257
+
1258
+ image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
1259
+
1260
+ if classes is None:
1261
+ classes = [0] * len(boxes)
1262
+
1263
+ render_db_bounding_boxes(boxes,
1264
+ classes,
1265
+ image,
1266
+ original_size=None,
1267
+ label_map=label_map,
1268
+ thickness=thickness,
1269
+ expansion=expansion)
1270
+
1271
+ image.save(output_file)
1272
+
1273
+ return image
1274
+
1275
+ # ...def draw_bounding_boxes_on_file(...)
1276
+
1277
+
1278
+ def gray_scale_fraction(image,crop_size=(0.1,0.1)):
1279
+ """
1280
+ Computes the fraction of the pixels in [image] that appear to be grayscale (R==G==B),
1281
+ useful for approximating whether this is a night-time image when flash information is not
1282
+ available in EXIF data (or for video frames, where this information is often not available
1283
+ in structured metadata at all).
1284
+
1285
+ Args:
1286
+ image (str or PIL.Image.Image): Image, filename, or URL to analyze
1287
+ crop_size (tuple of floats, optional): a 2-element list/tuple, representing the fraction of
1288
+ the image to crop at the top and bottom, respectively, before analyzing (to minimize
1289
+ the possibility of including color elements in the image overlay)
1290
+
1291
+ Returns:
1292
+ float: the fraction of pixels in [image] that appear to be grayscale (R==G==B)
1293
+ """
1294
+
1295
+ if isinstance(image,str):
1296
+ image = Image.open(image)
1297
+
1298
+ if image.mode == 'L':
1299
+ return 1.0
1300
+
1301
+ if len(image.getbands()) == 1:
1302
+ return 1.0
1303
+
1304
+ # Crop if necessary
1305
+ if crop_size[0] > 0 or crop_size[1] > 0:
1306
+
1307
+ assert (crop_size[0] + crop_size[1]) < 1.0, \
1308
+ 'Illegal crop size: {}'.format(str(crop_size))
1309
+
1310
+ top_crop_pixels = int(image.height * crop_size[0])
1311
+ bottom_crop_pixels = int(image.height * crop_size[1])
1312
+
1313
+ left = 0
1314
+ right = image.width
1315
+
1316
+ # Remove pixels from the top
1317
+ first_crop_top = top_crop_pixels
1318
+ first_crop_bottom = image.height
1319
+ first_crop = image.crop((left, first_crop_top, right, first_crop_bottom))
1320
+
1321
+ # Remove pixels from the bottom
1322
+ second_crop_top = 0
1323
+ second_crop_bottom = first_crop.height - bottom_crop_pixels
1324
+ second_crop = first_crop.crop((left, second_crop_top, right, second_crop_bottom))
1325
+
1326
+ image = second_crop
1327
+
1328
+ # It doesn't matter if these are actually R/G/B, they're just names
1329
+ r = np.array(image.getchannel(0))
1330
+ g = np.array(image.getchannel(1))
1331
+ b = np.array(image.getchannel(2))
1332
+
1333
+ gray_pixels = np.logical_and(r == g, r == b)
1334
+ n_pixels = gray_pixels.size
1335
+ n_gray_pixels = gray_pixels.sum()
1336
+
1337
+ return n_gray_pixels / n_pixels
1338
+
1339
+ # Non-numpy way to do the same thing, briefly keeping this here for posterity
1340
+ if False:
1341
+
1342
+ w, h = image.size
1343
+ n_pixels = w*h
1344
+ n_gray_pixels = 0
1345
+ for i in range(w):
1346
+ for j in range(h):
1347
+ r, g, b = image.getpixel((i,j))
1348
+ if r == g and r == b and g == b:
1349
+ n_gray_pixels += 1
1350
+
1351
+ # ...def gray_scale_fraction(...)
1352
+
1353
+
1354
+ def _resize_relative_image(fn_relative,
1355
+ input_folder,
1356
+ output_folder,
1357
+ target_width,
1358
+ target_height,
1359
+ no_enlarge_width,
1360
+ verbose,
1361
+ quality,
1362
+ overwrite=True):
1363
+ """
1364
+ Internal function for resizing an image from one folder to another,
1365
+ maintaining relative path.
1366
+ """
1367
+
1368
+ input_fn_abs = os.path.join(input_folder,fn_relative)
1369
+ output_fn_abs = os.path.join(output_folder,fn_relative)
1370
+
1371
+ if (not overwrite) and (os.path.isfile(output_fn_abs)):
1372
+ status = 'skipped'
1373
+ error = None
1374
+ return {'fn_relative':fn_relative,'status':status,'error':error}
1375
+
1376
+ os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
1377
+ try:
1378
+ _ = resize_image(input_fn_abs,
1379
+ output_file=output_fn_abs,
1380
+ target_width=target_width,
1381
+ target_height=target_height,
1382
+ no_enlarge_width=no_enlarge_width,
1383
+ verbose=verbose,
1384
+ quality=quality)
1385
+ status = 'success'
1386
+ error = None
1387
+ except Exception as e:
1388
+ if verbose:
1389
+ print('Error resizing {}: {}'.format(fn_relative,str(e)))
1390
+ status = 'error'
1391
+ error = str(e)
1392
+
1393
+ return {'fn_relative':fn_relative,'status':status,'error':error}
1394
+
1395
+ # ...def _resize_relative_image(...)
1396
+
1397
+
1398
+ def _resize_absolute_image(input_output_files,
1399
+ target_width,
1400
+ target_height,
1401
+ no_enlarge_width,
1402
+ verbose,
1403
+ quality):
1404
+ """
1405
+ Internal wrapper for resize_image used in the context of a batch resize operation.
1406
+ """
1407
+
1408
+ input_fn_abs = input_output_files[0]
1409
+ output_fn_abs = input_output_files[1]
1410
+ os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
1411
+ try:
1412
+ _ = resize_image(input_fn_abs,
1413
+ output_file=output_fn_abs,
1414
+ target_width=target_width,
1415
+ target_height=target_height,
1416
+ no_enlarge_width=no_enlarge_width,
1417
+ verbose=verbose,
1418
+ quality=quality)
1419
+ status = 'success'
1420
+ error = None
1421
+ except Exception as e:
1422
+ if verbose:
1423
+ print('Error resizing {}: {}'.format(input_fn_abs,str(e)))
1424
+ status = 'error'
1425
+ error = str(e)
1426
+
1427
+ return {'input_fn':input_fn_abs,
1428
+ 'output_fn':output_fn_abs,
1429
+ 'status':status,
1430
+ 'error':error}
1431
+
1432
+ # ..._resize_absolute_image(...)
1433
+
1434
+
1435
+ def resize_images(input_file_to_output_file,
1436
+ target_width=-1,
1437
+ target_height=-1,
1438
+ no_enlarge_width=False,
1439
+ verbose=False,
1440
+ quality='keep',
1441
+ pool_type='process',
1442
+ n_workers=10):
1443
+ """
1444
+ Resizes all images the dictionary [input_file_to_output_file].
1445
+
1446
+ TODO: This is a little more redundant with resize_image_folder than I would like;
1447
+ refactor resize_image_folder to call resize_images. Not doing that yet because
1448
+ at the time I'm writing this comment, a lot of code depends on resize_image_folder
1449
+ and I don't want to rock the boat yet.
1450
+
1451
+ Args:
1452
+ input_file_to_output_file (dict): dict mapping images that exist to the locations
1453
+ where the resized versions should be written
1454
+ target_width (int, optional): width to which we should resize this image, or -1
1455
+ to let target_height determine the size
1456
+ target_height (int, optional): height to which we should resize this image, or -1
1457
+ to let target_width determine the size
1458
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
1459
+ [target width] is larger than the original image width, does not modify the image,
1460
+ but will write to output_file if supplied
1461
+ verbose (bool, optional): enable additional debug output
1462
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
1463
+ pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
1464
+ parallelization; ignored if n_workers <= 1
1465
+ n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
1466
+ to disable parallelization
1467
+
1468
+ Returns:
1469
+ list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1470
+ 'status' will be 'success' or 'error'; 'error' will be None for successful cases,
1471
+ otherwise will contain the image-specific error.
1472
+ """
1473
+
1474
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
1475
+
1476
+ input_output_file_pairs = []
1477
+
1478
+ # Reformat input files as (input,output) tuples
1479
+ for input_fn in input_file_to_output_file:
1480
+ input_output_file_pairs.append((input_fn,input_file_to_output_file[input_fn]))
1481
+
1482
+ if n_workers == 1:
1483
+
1484
+ results = []
1485
+ for i_o_file_pair in tqdm(input_output_file_pairs):
1486
+ results.append(_resize_absolute_image(i_o_file_pair,
1487
+ target_width=target_width,
1488
+ target_height=target_height,
1489
+ no_enlarge_width=no_enlarge_width,
1490
+ verbose=verbose,
1491
+ quality=quality))
1492
+
1493
+ else:
1494
+
1495
+ pool = None
1496
+
1497
+ try:
1498
+
1499
+ if pool_type == 'thread':
1500
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1501
+ else:
1502
+ assert pool_type == 'process'
1503
+ pool = Pool(n_workers); poolstring = 'processes'
1504
+
1505
+ if verbose:
1506
+ print('Starting resizing pool with {} {}'.format(n_workers,poolstring))
1507
+
1508
+ p = partial(_resize_absolute_image,
1509
+ target_width=target_width,
1510
+ target_height=target_height,
1511
+ no_enlarge_width=no_enlarge_width,
1512
+ verbose=verbose,
1513
+ quality=quality)
1514
+
1515
+ results = list(tqdm(pool.imap(p, input_output_file_pairs),total=len(input_output_file_pairs)))
1516
+
1517
+ finally:
1518
+
1519
+ if pool is not None:
1520
+ pool.close()
1521
+ pool.join()
1522
+ print('Pool closed and joined for image resizing')
1523
+
1524
+ return results
1525
+
1526
+ # ...def resize_images(...)
1527
+
1528
+
1529
+ def resize_image_folder(input_folder,
1530
+ output_folder=None,
1531
+ target_width=-1,
1532
+ target_height=-1,
1533
+ no_enlarge_width=False,
1534
+ verbose=False,
1535
+ quality='keep',
1536
+ pool_type='process',
1537
+ n_workers=10,
1538
+ recursive=True,
1539
+ image_files_relative=None,
1540
+ overwrite=True):
1541
+ """
1542
+ Resize all images in a folder (defaults to recursive).
1543
+
1544
+ Defaults to in-place resizing (output_folder is optional).
1545
+
1546
+ Args:
1547
+ input_folder (str): folder in which we should find images to resize
1548
+ output_folder (str, optional): folder in which we should write resized images. If
1549
+ None, resizes images in place. Otherwise, maintains relative paths in the target
1550
+ folder.
1551
+ target_width (int, optional): width to which we should resize this image, or -1
1552
+ to let target_height determine the size
1553
+ target_height (int, optional): height to which we should resize this image, or -1
1554
+ to let target_width determine the size
1555
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
1556
+ [target width] is larger than the original image width, does not modify the image,
1557
+ but will write to output_file if supplied
1558
+ verbose (bool, optional): enable additional debug output
1559
+ quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
1560
+ pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
1561
+ parallelization; ignored if n_workers <= 1
1562
+ n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
1563
+ to disable parallelization
1564
+ recursive (bool, optional): whether to search [input_folder] recursively for images.
1565
+ image_files_relative (list, optional): if not None, skips any relative paths not
1566
+ in this list
1567
+ overwrite (bool, optional): whether to overwrite existing target images
1568
+
1569
+ Returns:
1570
+ list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
1571
+ 'status' will be 'success', 'skipped', or 'error'; 'error' will be None for successful
1572
+ cases, otherwise will contain the image-specific error.
1573
+ """
1574
+
1575
+ assert os.path.isdir(input_folder), '{} is not a folder'.format(input_folder)
1576
+
1577
+ if output_folder is None:
1578
+ output_folder = input_folder
1579
+ else:
1580
+ os.makedirs(output_folder,exist_ok=True)
1581
+
1582
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
1583
+
1584
+ if image_files_relative is None:
1585
+
1586
+ if verbose:
1587
+ print('Enumerating images')
1588
+
1589
+ image_files_relative = find_images(input_folder,recursive=recursive,
1590
+ return_relative_paths=True,convert_slashes=True)
1591
+ if verbose:
1592
+ print('Found {} images'.format(len(image_files_relative)))
1593
+
1594
+ if n_workers == 1:
1595
+
1596
+ if verbose:
1597
+ print('Resizing images')
1598
+
1599
+ results = []
1600
+ for fn_relative in tqdm(image_files_relative):
1601
+ results.append(_resize_relative_image(fn_relative,
1602
+ input_folder=input_folder,
1603
+ output_folder=output_folder,
1604
+ target_width=target_width,
1605
+ target_height=target_height,
1606
+ no_enlarge_width=no_enlarge_width,
1607
+ verbose=verbose,
1608
+ quality=quality,
1609
+ overwrite=overwrite))
1610
+
1611
+ else:
1612
+
1613
+ if pool_type == 'thread':
1614
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1615
+ else:
1616
+ assert pool_type == 'process'
1617
+ pool = Pool(n_workers); poolstring = 'processes'
1618
+
1619
+ if verbose:
1620
+ print('Starting resizing pool with {} {}'.format(n_workers,poolstring))
1621
+
1622
+ p = partial(_resize_relative_image,
1623
+ input_folder=input_folder,
1624
+ output_folder=output_folder,
1625
+ target_width=target_width,
1626
+ target_height=target_height,
1627
+ no_enlarge_width=no_enlarge_width,
1628
+ verbose=verbose,
1629
+ quality=quality,
1630
+ overwrite=overwrite)
1631
+
1632
+ results = list(tqdm(pool.imap(p, image_files_relative),
1633
+ total=len(image_files_relative)))
1634
+
1635
+ return results
1636
+
1637
+ # ...def resize_image_folder(...)
1638
+
1639
+
1640
+ def get_image_size(im,verbose=False):
1641
+ """
1642
+ Retrieve the size of an image. Returns None if the image fails to load.
1643
+
1644
+ Args:
1645
+ im (str or PIL.Image): filename or PIL image
1646
+ verbose (bool, optional): enable additional debug output
1647
+
1648
+ Returns:
1649
+ tuple (w,h), or None if the image fails to load.
1650
+ """
1651
+
1652
+ image_name = '[in memory]'
1653
+
1654
+ try:
1655
+ if isinstance(im,str):
1656
+ image_name = im
1657
+ im = load_image(im)
1658
+ w = im.width
1659
+ h = im.height
1660
+ if w <= 0 or h <= 0:
1661
+ if verbose:
1662
+ print('Error reading width from image {}: {},{}'.format(
1663
+ image_name,w,h))
1664
+ return None
1665
+ return (w,h)
1666
+ except Exception as e:
1667
+ if verbose:
1668
+ print('Error reading width from image {}: {}'.format(
1669
+ image_name,str(e)))
1670
+ return None
1671
+
1672
+ # ...def get_image_size(...)
1673
+
1674
+
1675
+ def parallel_get_image_sizes(filenames,
1676
+ max_workers=16,
1677
+ use_threads=True,
1678
+ recursive=True,
1679
+ verbose=False):
1680
+ """
1681
+ Retrieve image sizes for a list or folder of images
1682
+
1683
+ Args:
1684
+ filenames (list or str): a list of image filenames or a folder. Non-image files and
1685
+ unreadable images will be returned with a file size of None.
1686
+ max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
1687
+ parallelization
1688
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
1689
+ parallelization
1690
+ recursive (bool, optional): if [filenames] is a folder, whether to search recursively
1691
+ for images. Ignored if [filenames] is a list.
1692
+ verbose (bool, optional): enable additional debug output
1693
+
1694
+ Returns:
1695
+ dict: a dict mapping filenames to (w,h) tuples; the value will be None for images that fail
1696
+ to load. Filenames will always be absolute.
1697
+ """
1698
+
1699
+ if isinstance(filenames,str) and os.path.isdir(filenames):
1700
+ if verbose:
1701
+ print('Enumerating images in {}'.format(filenames))
1702
+ filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
1703
+
1704
+ n_workers = min(max_workers,len(filenames))
1705
+
1706
+ if verbose:
1707
+ print('Getting image sizes for {} images'.format(len(filenames)))
1708
+
1709
+ if n_workers <= 1:
1710
+
1711
+ results = []
1712
+ for filename in filenames:
1713
+ results.append(get_image_size(filename,verbose=verbose))
1714
+
1715
+ else:
1716
+
1717
+ if use_threads:
1718
+ pool = ThreadPool(n_workers)
1719
+ else:
1720
+ pool = Pool(n_workers)
1721
+
1722
+ try:
1723
+ results = list(tqdm(pool.imap(
1724
+ partial(get_image_size,verbose=verbose),filenames), total=len(filenames)))
1725
+ finally:
1726
+ pool.close()
1727
+ pool.join()
1728
+ print('Pool closed and joined for image size retrieval')
1729
+
1730
+ assert len(filenames) == len(results), 'Internal error in parallel_get_image_sizes'
1731
+
1732
+ to_return = {}
1733
+ for i_file,filename in enumerate(filenames):
1734
+ to_return[filename] = results[i_file]
1735
+
1736
+ return to_return
1737
+
1738
+
1739
+ #%% Image integrity checking functions
1740
+
1741
+ def check_image_integrity(filename,modes=None):
1742
+ """
1743
+ Check whether we can successfully load an image via OpenCV and/or PIL.
1744
+
1745
+ Args:
1746
+ filename (str): the filename to evaluate
1747
+ modes (list, optional): a list containing one or more of:
1748
+
1749
+ - 'cv'
1750
+ - 'pil'
1751
+ - 'skimage'
1752
+ - 'jpeg_trailer'
1753
+
1754
+ 'jpeg_trailer' checks that the binary data ends with ffd9. It does not check whether
1755
+ the image is actually a jpeg, and even if it is, there are lots of reasons the image might not
1756
+ end with ffd9. It's also true the JPEGs that cause "premature end of jpeg segment" issues
1757
+ don't end with ffd9, so this may be a useful diagnostic. High precision, very low recall
1758
+ for corrupt jpegs.
1759
+
1760
+ Set to None to use all modes.
1761
+
1762
+ Returns:
1763
+ dict: a dict with a key called 'file' (the value of [filename]), one key for each string in
1764
+ [modes] (a success indicator for that mode, specifically a string starting with either
1765
+ 'success' or 'error').
1766
+ """
1767
+
1768
+ if modes is None:
1769
+ modes = ('cv','pil','skimage','jpeg_trailer')
1770
+ else:
1771
+ if isinstance(modes,str):
1772
+ modes = [modes]
1773
+ for mode in modes:
1774
+ assert mode in ('cv','pil','skimage'), 'Unrecognized mode {}'.format(mode)
1775
+
1776
+ assert os.path.isfile(filename), 'Could not find file {}'.format(filename)
1777
+
1778
+ result = {}
1779
+ result['file'] = filename
1780
+
1781
+ for mode in modes:
1782
+
1783
+ result[mode] = 'unknown'
1784
+ if mode == 'pil':
1785
+ try:
1786
+ pil_im = load_image(filename) # noqa
1787
+ assert pil_im is not None
1788
+ result[mode] = 'success'
1789
+ except Exception as e:
1790
+ result[mode] = 'error: {}'.format(str(e))
1791
+ elif mode == 'cv':
1792
+ try:
1793
+ cv_im = cv2.imread(filename)
1794
+ assert cv_im is not None, 'Unknown opencv read failure'
1795
+ numpy_im = np.asarray(cv_im) # noqa
1796
+ result[mode] = 'success'
1797
+ except Exception as e:
1798
+ result[mode] = 'error: {}'.format(str(e))
1799
+ elif mode == 'skimage':
1800
+ try:
1801
+ # This is not a standard dependency
1802
+ from skimage import io as skimage_io # type: ignore # noqa
1803
+ except Exception:
1804
+ result[mode] = 'could not import skimage, run pip install scikit-image'
1805
+ return result
1806
+ try:
1807
+ skimage_im = skimage_io.imread(filename) # noqa
1808
+ assert skimage_im is not None
1809
+ result[mode] = 'success'
1810
+ except Exception as e:
1811
+ result[mode] = 'error: {}'.format(str(e))
1812
+ elif mode == 'jpeg_trailer':
1813
+ # https://stackoverflow.com/a/48282863/16644970
1814
+ try:
1815
+ with open(filename, 'rb') as f:
1816
+ check_chars = f.read()[-2:]
1817
+ if check_chars != b'\xff\xd9':
1818
+ result[mode] = 'invalid jpeg trailer: {}'.format(str(check_chars))
1819
+ else:
1820
+ result[mode] = 'success'
1821
+ except Exception as e:
1822
+ result[mode] = 'error: {}'.format(str(e))
1823
+
1824
+ # ...for each mode
1825
+
1826
+ return result
1827
+
1828
+ # ...def check_image_integrity(...)
1829
+
1830
+
1831
+ def parallel_check_image_integrity(filenames,
1832
+ modes=None,
1833
+ max_workers=16,
1834
+ use_threads=True,
1835
+ recursive=True,
1836
+ verbose=False):
1837
+ """
1838
+ Check whether we can successfully load a list of images via OpenCV and/or PIL.
1839
+
1840
+ Args:
1841
+ filenames (list or str): a list of image filenames or a folder
1842
+ modes (list, optional): see check_image_integrity() for documentation on the [modes] parameter
1843
+ max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
1844
+ parallelization
1845
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
1846
+ parallelization
1847
+ recursive (bool, optional): if [filenames] is a folder, whether to search recursively for images.
1848
+ Ignored if [filenames] is a list.
1849
+ verbose (bool, optional): enable additional debug output
1850
+
1851
+ Returns:
1852
+ list: a list of dicts, each with a key called 'file' (the value of [filename]), one key for
1853
+ each string in [modes] (a success indicator for that mode, specifically a string starting
1854
+ with either 'success' or 'error').
1855
+ """
1856
+
1857
+ if isinstance(filenames,str) and os.path.isdir(filenames):
1858
+ if verbose:
1859
+ print('Enumerating images in {}'.format(filenames))
1860
+ filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
1861
+
1862
+ n_workers = min(max_workers,len(filenames))
1863
+
1864
+ if verbose:
1865
+ print('Checking image integrity for {} filenames'.format(len(filenames)))
1866
+
1867
+ if n_workers <= 1:
1868
+
1869
+ results = []
1870
+ for filename in filenames:
1871
+ results.append(check_image_integrity(filename,modes=modes))
1872
+
1873
+ else:
1874
+
1875
+ if use_threads:
1876
+ pool = ThreadPool(n_workers)
1877
+ else:
1878
+ pool = Pool(n_workers)
1879
+
1880
+ results = list(tqdm(pool.imap(
1881
+ partial(check_image_integrity,modes=modes),filenames), total=len(filenames)))
1882
+
1883
+ return results
1884
+
1885
+
1886
+ #%% Test drivers
1887
+
1888
+ if False:
1889
+
1890
+ #%% Text rendering tests
1891
+
1892
+ import os # noqa
1893
+ import numpy as np # noqa
1894
+ from megadetector.visualization.visualization_utils import \
1895
+ draw_bounding_boxes_on_image, exif_preserving_save, load_image, \
1896
+ TEXTALIGN_LEFT,TEXTALIGN_RIGHT,VTEXTALIGN_BOTTOM,VTEXTALIGN_TOP, \
1897
+ DEFAULT_LABEL_FONT_SIZE
1898
+
1899
+ fn = os.path.expanduser('~/AppData/Local/Temp/md-tests/md-test-images/ena24_7904.jpg')
1900
+ output_fn = r'g:\temp\test.jpg'
1901
+
1902
+ image = load_image(fn)
1903
+
1904
+ w = 0.2; h = 0.2
1905
+ all_boxes = [[0.05, 0.05, 0.25, 0.25],
1906
+ [0.05, 0.35, 0.25, 0.6],
1907
+ [0.35, 0.05, 0.6, 0.25],
1908
+ [0.35, 0.35, 0.6, 0.6]]
1909
+
1910
+ alignments = [
1911
+ [TEXTALIGN_LEFT,VTEXTALIGN_TOP],
1912
+ [TEXTALIGN_LEFT,VTEXTALIGN_BOTTOM],
1913
+ [TEXTALIGN_RIGHT,VTEXTALIGN_TOP],
1914
+ [TEXTALIGN_RIGHT,VTEXTALIGN_BOTTOM]
1915
+ ]
1916
+
1917
+ labels = ['left_top','left_bottom','right_top','right_bottom']
1918
+
1919
+ text_rotation = -90
1920
+ n_label_copies = 2
1921
+
1922
+ for i_box,box in enumerate(all_boxes):
1923
+
1924
+ boxes = [box]
1925
+ boxes = np.array(boxes)
1926
+ classes = [i_box]
1927
+ display_strs = [[labels[i_box]]*n_label_copies]
1928
+ textalign = alignments[i_box][0]
1929
+ vtextalign = alignments[i_box][1]
1930
+ draw_bounding_boxes_on_image(image,
1931
+ boxes,
1932
+ classes,
1933
+ thickness=2,
1934
+ expansion=0,
1935
+ display_strs=display_strs,
1936
+ colormap=None,
1937
+ textalign=textalign,
1938
+ vtextalign=vtextalign,
1939
+ label_font_size=DEFAULT_LABEL_FONT_SIZE,
1940
+ text_rotation=text_rotation)
1941
+
1942
+ exif_preserving_save(image,output_fn)
1943
+ from megadetector.utils.path_utils import open_file
1944
+ open_file(output_fn)
1945
+
1946
+
1947
+ #%% Recursive resize test
1948
+
1949
+ from megadetector.visualization.visualization_utils import resize_image_folder # noqa
1950
+
1951
+ input_folder = r"C:\temp\resize-test\in"
1952
+ output_folder = r"C:\temp\resize-test\out"
1953
+
1954
+ resize_results = resize_image_folder(input_folder,output_folder,
1955
+ target_width=1280,verbose=True,quality=85,no_enlarge_width=True,
1956
+ pool_type='process',n_workers=10)
1957
+
1958
+
1959
+ #%% Integrity checking test
1960
+
1961
+ from megadetector.utils import md_tests
1962
+ options = md_tests.download_test_data()
1963
+ folder = options.scratch_dir
1964
+
1965
+ results = parallel_check_image_integrity(folder,max_workers=8)
1966
+
1967
+ modes = ['cv','pil','skimage','jpeg_trailer']
1968
+
1969
+ for r in results:
1970
+ for mode in modes:
1971
+ if r[mode] != 'success':
1972
+ s = r[mode]
1973
+ print('Mode {} failed for {}:\n{}\n'.format(mode,r['file'],s))