megadetector 10.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +702 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +528 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +187 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +663 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +876 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2159 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1494 -0
- megadetector/detection/run_tiled_inference.py +1038 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1752 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2077 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +224 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2832 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1759 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1940 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +479 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.13.dist-info/METADATA +134 -0
- megadetector-10.0.13.dist-info/RECORD +147 -0
- megadetector-10.0.13.dist-info/WHEEL +5 -0
- megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1940 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
visualization_utils.py
|
|
4
|
+
|
|
5
|
+
Rendering functions shared across visualization scripts
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Constants and imports
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
import numpy as np
|
|
13
|
+
import requests
|
|
14
|
+
import os
|
|
15
|
+
import cv2
|
|
16
|
+
|
|
17
|
+
from io import BytesIO
|
|
18
|
+
from PIL import Image, ImageFile, ImageFont, ImageDraw, ImageFilter
|
|
19
|
+
from multiprocessing.pool import ThreadPool
|
|
20
|
+
from multiprocessing.pool import Pool
|
|
21
|
+
from tqdm import tqdm
|
|
22
|
+
from functools import partial
|
|
23
|
+
|
|
24
|
+
from megadetector.utils.path_utils import find_images
|
|
25
|
+
from megadetector.data_management.annotations import annotation_constants
|
|
26
|
+
from megadetector.data_management.annotations.annotation_constants import \
|
|
27
|
+
detector_bbox_category_id_to_name
|
|
28
|
+
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
|
|
29
|
+
|
|
30
|
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|
31
|
+
|
|
32
|
+
# Maps EXIF standard rotation identifiers to degrees. The value "1" indicates no
|
|
33
|
+
# rotation; this will be ignored. The values 2, 4, 5, and 7 are mirrored rotations,
|
|
34
|
+
# which are not supported (we'll assert() on this when we apply rotations).
|
|
35
|
+
EXIF_IMAGE_NO_ROTATION = 1
|
|
36
|
+
EXIF_IMAGE_ROTATIONS = {
|
|
37
|
+
3: 180,
|
|
38
|
+
6: 270,
|
|
39
|
+
8: 90
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
TEXTALIGN_LEFT = 0
|
|
43
|
+
TEXTALIGN_RIGHT = 1
|
|
44
|
+
TEXTALIGN_CENTER = 2
|
|
45
|
+
|
|
46
|
+
VTEXTALIGN_TOP = 0
|
|
47
|
+
VTEXTALIGN_BOTTOM = 1
|
|
48
|
+
|
|
49
|
+
# Convert category ID from int to str
|
|
50
|
+
DEFAULT_DETECTOR_LABEL_MAP = {
|
|
51
|
+
str(k): v for k, v in detector_bbox_category_id_to_name.items()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# Constants controlling retry behavior when fetching images from URLs
|
|
55
|
+
n_retries = 10
|
|
56
|
+
retry_sleep_time = 0.01
|
|
57
|
+
|
|
58
|
+
# If we try to open an image from a URL, and we encounter any error in this list,
|
|
59
|
+
# we'll retry, otherwise it's just an error.
|
|
60
|
+
error_names_for_retry = ['ConnectionError']
|
|
61
|
+
|
|
62
|
+
DEFAULT_BOX_THICKNESS = 4
|
|
63
|
+
DEFAULT_LABEL_FONT_SIZE = 16
|
|
64
|
+
|
|
65
|
+
# Default color map for mapping integer category IDs to colors when rendering bounding
|
|
66
|
+
# boxes
|
|
67
|
+
DEFAULT_COLORS = [
|
|
68
|
+
'AliceBlue', 'Red', 'RoyalBlue', 'Gold', 'Chartreuse', 'Aqua', 'Azure',
|
|
69
|
+
'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
|
|
70
|
+
'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson',
|
|
71
|
+
'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
|
|
72
|
+
'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
|
|
73
|
+
'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
|
|
74
|
+
'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'GoldenRod',
|
|
75
|
+
'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
|
|
76
|
+
'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
|
|
77
|
+
'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
|
|
78
|
+
'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
|
|
79
|
+
'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
|
|
80
|
+
'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
|
|
81
|
+
'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
|
|
82
|
+
'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
|
|
83
|
+
'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
|
|
84
|
+
'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
|
|
85
|
+
'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
|
|
86
|
+
'RosyBrown', 'Aquamarine', 'SaddleBrown', 'Green', 'SandyBrown',
|
|
87
|
+
'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
|
|
88
|
+
'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
|
|
89
|
+
'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
|
|
90
|
+
'WhiteSmoke', 'Yellow', 'YellowGreen'
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
#%% Functions
|
|
95
|
+
|
|
96
|
+
def open_image(input_file, ignore_exif_rotation=False):
|
|
97
|
+
"""
|
|
98
|
+
Opens an image in binary format using PIL.Image and converts to RGB mode.
|
|
99
|
+
|
|
100
|
+
Supports local files or URLs.
|
|
101
|
+
|
|
102
|
+
This operation is lazy; image will not be actually loaded until the first
|
|
103
|
+
operation that needs to load it (for example, resizing), so file opening
|
|
104
|
+
errors can show up later. load_image() is the non-lazy version of this function.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
input_file (str or BytesIO): can be a path to an image file (anything
|
|
108
|
+
that PIL can open), a URL, or an image as a stream of bytes
|
|
109
|
+
ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
|
|
110
|
+
even if we are loading a JPEG and that JPEG says it should be rotated
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
PIL.Image.Image: A PIL Image object in RGB mode
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
if (isinstance(input_file, str)
|
|
117
|
+
and input_file.startswith(('http://', 'https://'))):
|
|
118
|
+
try:
|
|
119
|
+
response = requests.get(input_file)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f'Error retrieving image {input_file}: {e}')
|
|
122
|
+
success = False
|
|
123
|
+
if e.__class__.__name__ in error_names_for_retry:
|
|
124
|
+
for i_retry in range(0,n_retries):
|
|
125
|
+
try:
|
|
126
|
+
time.sleep(retry_sleep_time)
|
|
127
|
+
response = requests.get(input_file)
|
|
128
|
+
except Exception as e:
|
|
129
|
+
print(f'Error retrieving image {input_file} on retry {i_retry}: {e}')
|
|
130
|
+
continue
|
|
131
|
+
print('Succeeded on retry {}'.format(i_retry))
|
|
132
|
+
success = True
|
|
133
|
+
break
|
|
134
|
+
if not success:
|
|
135
|
+
raise
|
|
136
|
+
try:
|
|
137
|
+
image = Image.open(BytesIO(response.content))
|
|
138
|
+
except Exception as e:
|
|
139
|
+
print(f'Error opening image {input_file}: {e}')
|
|
140
|
+
raise
|
|
141
|
+
|
|
142
|
+
else:
|
|
143
|
+
image = Image.open(input_file)
|
|
144
|
+
|
|
145
|
+
# Convert to RGB if necessary
|
|
146
|
+
if image.mode not in ('RGBA', 'RGB', 'L', 'I;16'):
|
|
147
|
+
raise AttributeError(
|
|
148
|
+
f'Image {input_file} uses unsupported mode {image.mode}')
|
|
149
|
+
if image.mode == 'RGBA' or image.mode == 'L':
|
|
150
|
+
# PIL.Image.convert() returns a converted copy of this image
|
|
151
|
+
image = image.convert(mode='RGB')
|
|
152
|
+
|
|
153
|
+
if not ignore_exif_rotation:
|
|
154
|
+
# Alter orientation as needed according to EXIF tag 0x112 (274) for Orientation
|
|
155
|
+
#
|
|
156
|
+
# https://gist.github.com/dangtrinhnt/a577ece4cbe5364aad28
|
|
157
|
+
# https://www.media.mit.edu/pia/Research/deepview/exif.html
|
|
158
|
+
#
|
|
159
|
+
try:
|
|
160
|
+
exif = image._getexif()
|
|
161
|
+
orientation: int = exif.get(274, None)
|
|
162
|
+
if (orientation is not None) and (orientation != EXIF_IMAGE_NO_ROTATION):
|
|
163
|
+
assert orientation in EXIF_IMAGE_ROTATIONS, \
|
|
164
|
+
'Mirrored rotations are not supported'
|
|
165
|
+
image = image.rotate(EXIF_IMAGE_ROTATIONS[orientation], expand=True)
|
|
166
|
+
except Exception:
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
return image
|
|
170
|
+
|
|
171
|
+
# ...def open_image(...)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def exif_preserving_save(pil_image,output_file,quality='keep',default_quality=85,verbose=False):
|
|
175
|
+
"""
|
|
176
|
+
Saves [pil_image] to [output_file], making a moderate attempt to preserve EXIF
|
|
177
|
+
data and JPEG quality. Neither is guaranteed.
|
|
178
|
+
|
|
179
|
+
Also see:
|
|
180
|
+
|
|
181
|
+
https://discuss.dizzycoding.com/determining-jpg-quality-in-python-pil/
|
|
182
|
+
|
|
183
|
+
...for more ways to preserve jpeg quality if quality='keep' doesn't do the trick.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
pil_image (Image): the PIL Image object to save
|
|
187
|
+
output_file (str): the destination file
|
|
188
|
+
quality (str or int, optional): can be "keep" (default), or an integer from 0 to 100.
|
|
189
|
+
This is only used if PIL thinks the the source image is a JPEG. If you load a JPEG
|
|
190
|
+
and resize it in memory, for example, it's no longer a JPEG.
|
|
191
|
+
default_quality (int, optional): determines output quality when quality == 'keep' and we are
|
|
192
|
+
saving a non-JPEG source to a JPEG file
|
|
193
|
+
verbose (bool, optional): enable additional debug console output
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
# Read EXIF metadata
|
|
197
|
+
exif = pil_image.info['exif'] if ('exif' in pil_image.info) else None
|
|
198
|
+
|
|
199
|
+
# Quality preservation is only supported for JPEG sources.
|
|
200
|
+
if pil_image.format != "JPEG":
|
|
201
|
+
if quality == 'keep':
|
|
202
|
+
if verbose:
|
|
203
|
+
print('Warning: quality "keep" passed when saving a non-JPEG source (during save to {})'.format(
|
|
204
|
+
output_file))
|
|
205
|
+
quality = default_quality
|
|
206
|
+
|
|
207
|
+
# Some output formats don't support the quality parameter, so we try once with,
|
|
208
|
+
# and once without. This is a horrible cascade of if's, but it's a consequence of
|
|
209
|
+
# the fact that "None" is not supported for either "exif" or "quality".
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
|
|
213
|
+
if exif is not None:
|
|
214
|
+
pil_image.save(output_file, exif=exif, quality=quality)
|
|
215
|
+
else:
|
|
216
|
+
pil_image.save(output_file, quality=quality)
|
|
217
|
+
|
|
218
|
+
except Exception:
|
|
219
|
+
|
|
220
|
+
if verbose:
|
|
221
|
+
print('Warning: failed to write {}, trying again without quality parameter'.format(output_file))
|
|
222
|
+
if exif is not None:
|
|
223
|
+
pil_image.save(output_file, exif=exif)
|
|
224
|
+
else:
|
|
225
|
+
pil_image.save(output_file)
|
|
226
|
+
|
|
227
|
+
# ...def exif_preserving_save(...)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def load_image(input_file, ignore_exif_rotation=False):
|
|
231
|
+
"""
|
|
232
|
+
Loads an image file. This is the non-lazy version of open_file(); i.e.,
|
|
233
|
+
it forces image decoding before returning.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
input_file (str or BytesIO): can be a path to an image file (anything
|
|
237
|
+
that PIL can open), a URL, or an image as a stream of bytes
|
|
238
|
+
ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
|
|
239
|
+
even if we are loading a JPEG and that JPEG says it should be rotated
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
PIL.Image.Image: a PIL Image object in RGB mode
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
|
|
246
|
+
image.load()
|
|
247
|
+
return image
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def resize_image(image,
|
|
251
|
+
target_width=-1,
|
|
252
|
+
target_height=-1,
|
|
253
|
+
output_file=None,
|
|
254
|
+
no_enlarge_width=False,
|
|
255
|
+
verbose=False,
|
|
256
|
+
quality='keep'):
|
|
257
|
+
"""
|
|
258
|
+
Resizes a PIL Image object to the specified width and height; does not resize
|
|
259
|
+
in place. If either width or height are -1, resizes with aspect ratio preservation.
|
|
260
|
+
|
|
261
|
+
If target_width and target_height are both -1, does not modify the image, but
|
|
262
|
+
will write to output_file if supplied.
|
|
263
|
+
|
|
264
|
+
If no resizing is required, and an Image object is supplied, returns the original Image
|
|
265
|
+
object (i.e., does not copy).
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
image (Image or str): PIL Image object or a filename (local file or URL)
|
|
269
|
+
target_width (int, optional): width to which we should resize this image, or -1
|
|
270
|
+
to let target_height determine the size
|
|
271
|
+
target_height (int, optional): height to which we should resize this image, or -1
|
|
272
|
+
to let target_width determine the size
|
|
273
|
+
output_file (str, optional): file to which we should save this image; if None,
|
|
274
|
+
just returns the image without saving
|
|
275
|
+
no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
|
|
276
|
+
[target width] is larger than the original image width, does not modify the image,
|
|
277
|
+
but will write to output_file if supplied
|
|
278
|
+
verbose (bool, optional): enable additional debug output
|
|
279
|
+
quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
PIL.Image.Image: the resized image, which may be the original image if no resizing is
|
|
283
|
+
required
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
image_fn = 'in_memory'
|
|
287
|
+
if isinstance(image,str):
|
|
288
|
+
image_fn = image
|
|
289
|
+
image = load_image(image)
|
|
290
|
+
|
|
291
|
+
if target_width is None:
|
|
292
|
+
target_width = -1
|
|
293
|
+
|
|
294
|
+
if target_height is None:
|
|
295
|
+
target_height = -1
|
|
296
|
+
|
|
297
|
+
resize_required = True
|
|
298
|
+
|
|
299
|
+
# No resize was requested, this is always a no-op
|
|
300
|
+
if target_width == -1 and target_height == -1:
|
|
301
|
+
|
|
302
|
+
resize_required = False
|
|
303
|
+
|
|
304
|
+
# Does either dimension need to scale according to the other?
|
|
305
|
+
elif target_width == -1 or target_height == -1:
|
|
306
|
+
|
|
307
|
+
# Aspect ratio as width over height
|
|
308
|
+
# ar = w / h
|
|
309
|
+
aspect_ratio = image.size[0] / image.size[1]
|
|
310
|
+
|
|
311
|
+
if target_width != -1:
|
|
312
|
+
# h = w / ar
|
|
313
|
+
target_height = int(target_width / aspect_ratio)
|
|
314
|
+
else:
|
|
315
|
+
# w = ar * h
|
|
316
|
+
target_width = int(aspect_ratio * target_height)
|
|
317
|
+
|
|
318
|
+
# If we're not enlarging images and this would be an enlarge operation
|
|
319
|
+
if (no_enlarge_width) and (target_width > image.size[0]):
|
|
320
|
+
|
|
321
|
+
if verbose:
|
|
322
|
+
print('Bypassing image enlarge for {} --> {}'.format(
|
|
323
|
+
image_fn,str(output_file)))
|
|
324
|
+
resize_required = False
|
|
325
|
+
|
|
326
|
+
# If the target size is the same as the original size
|
|
327
|
+
if (target_width == image.size[0]) and (target_height == image.size[1]):
|
|
328
|
+
|
|
329
|
+
resize_required = False
|
|
330
|
+
|
|
331
|
+
if not resize_required:
|
|
332
|
+
|
|
333
|
+
if output_file is not None:
|
|
334
|
+
if verbose:
|
|
335
|
+
print('No resize required for resize {} --> {}'.format(
|
|
336
|
+
image_fn,str(output_file)))
|
|
337
|
+
exif_preserving_save(image,output_file,quality=quality,verbose=verbose)
|
|
338
|
+
return image
|
|
339
|
+
|
|
340
|
+
assert target_width > 0 and target_height > 0, \
|
|
341
|
+
'Invalid image resize target {},{}'.format(target_width,target_height)
|
|
342
|
+
|
|
343
|
+
# The antialiasing parameter changed between Pillow versions 9 and 10, and for a bit,
|
|
344
|
+
# I'd like to support both.
|
|
345
|
+
try:
|
|
346
|
+
resized_image = image.resize((target_width, target_height), Image.ANTIALIAS)
|
|
347
|
+
except Exception:
|
|
348
|
+
resized_image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
|
349
|
+
|
|
350
|
+
if output_file is not None:
|
|
351
|
+
exif_preserving_save(resized_image,output_file,quality=quality,verbose=verbose)
|
|
352
|
+
|
|
353
|
+
return resized_image
|
|
354
|
+
|
|
355
|
+
# ...def resize_image(...)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def crop_image(detections, image, confidence_threshold=0.15, expansion=0):
|
|
359
|
+
"""
|
|
360
|
+
Crops detections above [confidence_threshold] from the PIL image [image],
|
|
361
|
+
returning a list of PIL Images, preserving the order of [detections].
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
detections (list): a list of dictionaries with keys 'conf' and 'bbox';
|
|
365
|
+
boxes are length-four arrays formatted as [x,y,w,h], normalized,
|
|
366
|
+
upper-left origin (this is the standard MD detection format)
|
|
367
|
+
image (Image or str): the PIL Image object from which we should crop detections,
|
|
368
|
+
or an image filename
|
|
369
|
+
confidence_threshold (float, optional): only crop detections above this threshold
|
|
370
|
+
expansion (int, optional): a number of pixels to include on each side of a cropped
|
|
371
|
+
detection
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
list: a possibly-empty list of PIL Image objects
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
ret_images = []
|
|
378
|
+
|
|
379
|
+
if isinstance(image,str):
|
|
380
|
+
image = load_image(image)
|
|
381
|
+
|
|
382
|
+
for detection in detections:
|
|
383
|
+
|
|
384
|
+
score = float(detection['conf'])
|
|
385
|
+
|
|
386
|
+
if (confidence_threshold is None) or (score >= confidence_threshold):
|
|
387
|
+
|
|
388
|
+
x1, y1, w_box, h_box = detection['bbox']
|
|
389
|
+
ymin,xmin,ymax,xmax = y1, x1, y1 + h_box, x1 + w_box
|
|
390
|
+
|
|
391
|
+
# Convert to pixels so we can use the PIL crop() function
|
|
392
|
+
im_width, im_height = image.size
|
|
393
|
+
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
|
|
394
|
+
ymin * im_height, ymax * im_height)
|
|
395
|
+
|
|
396
|
+
if expansion > 0:
|
|
397
|
+
left -= expansion
|
|
398
|
+
right += expansion
|
|
399
|
+
top -= expansion
|
|
400
|
+
bottom += expansion
|
|
401
|
+
|
|
402
|
+
# PIL's crop() does surprising things if you provide values outside of
|
|
403
|
+
# the image, clip inputs
|
|
404
|
+
left = max(left,0); right = max(right,0)
|
|
405
|
+
top = max(top,0); bottom = max(bottom,0)
|
|
406
|
+
|
|
407
|
+
left = min(left,im_width-1); right = min(right,im_width-1)
|
|
408
|
+
top = min(top,im_height-1); bottom = min(bottom,im_height-1)
|
|
409
|
+
|
|
410
|
+
ret_images.append(image.crop((left, top, right, bottom)))
|
|
411
|
+
|
|
412
|
+
# ...if this detection is above threshold
|
|
413
|
+
|
|
414
|
+
# ...for each detection
|
|
415
|
+
|
|
416
|
+
return ret_images
|
|
417
|
+
|
|
418
|
+
# ...def crop_image(...)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def blur_detections(image,detections,blur_radius=40):
|
|
422
|
+
"""
|
|
423
|
+
Blur the regions in [image] corresponding to the MD-formatted list [detections].
|
|
424
|
+
[image] is modified in place.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
image (PIL.Image.Image): image in which we should blur specific regions
|
|
428
|
+
detections (list): list of detections in the MD output format, see render
|
|
429
|
+
detection_bounding_boxes for more detail.
|
|
430
|
+
blur_radius (int, optional): radius of blur kernel in pixels
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
img_width, img_height = image.size
|
|
434
|
+
|
|
435
|
+
for d in detections:
|
|
436
|
+
|
|
437
|
+
bbox = d['bbox']
|
|
438
|
+
x_norm, y_norm, width_norm, height_norm = bbox
|
|
439
|
+
|
|
440
|
+
# Calculate absolute pixel coordinates
|
|
441
|
+
x = int(x_norm * img_width)
|
|
442
|
+
y = int(y_norm * img_height)
|
|
443
|
+
width = int(width_norm * img_width)
|
|
444
|
+
height = int(height_norm * img_height)
|
|
445
|
+
|
|
446
|
+
# Calculate box boundaries
|
|
447
|
+
left = max(0, x)
|
|
448
|
+
top = max(0, y)
|
|
449
|
+
right = min(img_width, x + width)
|
|
450
|
+
bottom = min(img_height, y + height)
|
|
451
|
+
|
|
452
|
+
# Crop the region, blur it, and paste it back
|
|
453
|
+
region = image.crop((left, top, right, bottom))
|
|
454
|
+
blurred_region = region.filter(ImageFilter.GaussianBlur(radius=blur_radius))
|
|
455
|
+
image.paste(blurred_region, (left, top))
|
|
456
|
+
|
|
457
|
+
# ...for each detection
|
|
458
|
+
|
|
459
|
+
# ...def blur_detections(...)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def render_detection_bounding_boxes(detections,
|
|
463
|
+
image,
|
|
464
|
+
label_map='show_categories',
|
|
465
|
+
classification_label_map=None,
|
|
466
|
+
confidence_threshold=0.0,
|
|
467
|
+
thickness=DEFAULT_BOX_THICKNESS,
|
|
468
|
+
expansion=0,
|
|
469
|
+
classification_confidence_threshold=0.3,
|
|
470
|
+
max_classifications=3,
|
|
471
|
+
colormap=None,
|
|
472
|
+
textalign=TEXTALIGN_LEFT,
|
|
473
|
+
vtextalign=VTEXTALIGN_TOP,
|
|
474
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE,
|
|
475
|
+
custom_strings=None,
|
|
476
|
+
box_sort_order='confidence',
|
|
477
|
+
verbose=False):
|
|
478
|
+
"""
|
|
479
|
+
Renders bounding boxes (with labels and confidence values) on an image for all
|
|
480
|
+
detections above a threshold.
|
|
481
|
+
|
|
482
|
+
Renders classification labels if present.
|
|
483
|
+
|
|
484
|
+
[image] is modified in place.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
detections (list): list of detections in the MD output format, for example:
|
|
488
|
+
|
|
489
|
+
.. code-block::none
|
|
490
|
+
|
|
491
|
+
[
|
|
492
|
+
{
|
|
493
|
+
"category": "2",
|
|
494
|
+
"conf": 0.996,
|
|
495
|
+
"bbox": [
|
|
496
|
+
0.0,
|
|
497
|
+
0.2762,
|
|
498
|
+
0.1234,
|
|
499
|
+
0.2458
|
|
500
|
+
]
|
|
501
|
+
}
|
|
502
|
+
]
|
|
503
|
+
|
|
504
|
+
...where the bbox coordinates are [x, y, box_width, box_height].
|
|
505
|
+
|
|
506
|
+
(0, 0) is the upper-left. Coordinates are normalized.
|
|
507
|
+
|
|
508
|
+
Supports classification results, in the standard format:
|
|
509
|
+
|
|
510
|
+
.. code-block::none
|
|
511
|
+
|
|
512
|
+
[
|
|
513
|
+
{
|
|
514
|
+
"category": "2",
|
|
515
|
+
"conf": 0.996,
|
|
516
|
+
"bbox": [
|
|
517
|
+
0.0,
|
|
518
|
+
0.2762,
|
|
519
|
+
0.1234,
|
|
520
|
+
0.2458
|
|
521
|
+
]
|
|
522
|
+
"classifications": [
|
|
523
|
+
["3", 0.901],
|
|
524
|
+
["1", 0.071],
|
|
525
|
+
["4", 0.025]
|
|
526
|
+
]
|
|
527
|
+
}
|
|
528
|
+
]
|
|
529
|
+
|
|
530
|
+
image (PIL.Image.Image): image on which we should render detections
|
|
531
|
+
label_map (dict, optional): optional, mapping the numeric label to a string name. The type of the
|
|
532
|
+
numeric label (typically strings) needs to be consistent with the keys in label_map; no casting is
|
|
533
|
+
carried out. If [label_map] is None, no labels are shown (not even numbers and confidence values).
|
|
534
|
+
If you want category numbers and confidence values without class labels, use the default value,
|
|
535
|
+
the string 'show_categories'.
|
|
536
|
+
classification_label_map (dict, optional): optional, mapping of the string class labels to the actual
|
|
537
|
+
class names. The type of the numeric label (typically strings) needs to be consistent with the keys
|
|
538
|
+
in label_map; no casting is carried out. If [label_map] is None, no labels are shown (not even numbers
|
|
539
|
+
and confidence values).
|
|
540
|
+
confidence_threshold (float or dict, optional): threshold above which boxes are rendered. Can also be a
|
|
541
|
+
dictionary mapping category IDs to thresholds.
|
|
542
|
+
thickness (int, optional): line thickness in pixels
|
|
543
|
+
expansion (int, optional): number of pixels to expand bounding boxes on each side
|
|
544
|
+
classification_confidence_threshold (float, optional): confidence above which classification results
|
|
545
|
+
are displayed
|
|
546
|
+
max_classifications (int, optional): maximum number of classification results rendered for one image
|
|
547
|
+
colormap (list, optional): list of color names, used to choose colors for categories by
|
|
548
|
+
indexing with the values in [classes]; defaults to a reasonable set of colors
|
|
549
|
+
textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
|
|
550
|
+
vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
|
|
551
|
+
label_font_size (float, optional): font size for labels
|
|
552
|
+
custom_strings (list of str, optional): optional set of strings to append to detection labels, should
|
|
553
|
+
have the same length as [detections]. Appended before any classification labels.
|
|
554
|
+
box_sort_order (str, optional): sorting scheme for detection boxes, can be None, "confidence", or
|
|
555
|
+
"reverse_confidence". "confidence" puts the highest-confidence boxes on top.
|
|
556
|
+
verbose (bool, optional): enable additional debug output
|
|
557
|
+
"""
|
|
558
|
+
|
|
559
|
+
# Input validation
|
|
560
|
+
if (label_map is not None) and (isinstance(label_map,str)) and (label_map == 'show_categories'):
|
|
561
|
+
label_map = {}
|
|
562
|
+
|
|
563
|
+
if custom_strings is not None:
|
|
564
|
+
assert len(custom_strings) == len(detections), \
|
|
565
|
+
'{} custom strings provided for {} detections'.format(
|
|
566
|
+
len(custom_strings),len(detections))
|
|
567
|
+
|
|
568
|
+
display_boxes = []
|
|
569
|
+
|
|
570
|
+
# list of lists, one list of strings for each bounding box (to accommodate multiple labels)
|
|
571
|
+
display_strs = []
|
|
572
|
+
|
|
573
|
+
# for color selection
|
|
574
|
+
classes = []
|
|
575
|
+
|
|
576
|
+
if box_sort_order is not None:
|
|
577
|
+
|
|
578
|
+
if box_sort_order == 'confidence':
|
|
579
|
+
detections = sort_list_of_dicts_by_key(detections,k='conf',reverse=False)
|
|
580
|
+
elif box_sort_order == 'reverse_confidence':
|
|
581
|
+
detections = sort_list_of_dicts_by_key(detections,k='conf',reverse=True)
|
|
582
|
+
else:
|
|
583
|
+
raise ValueError('Unrecognized sorting scheme {}'.format(box_sort_order))
|
|
584
|
+
|
|
585
|
+
for i_detection,detection in enumerate(detections):
|
|
586
|
+
|
|
587
|
+
score = detection['conf']
|
|
588
|
+
|
|
589
|
+
if isinstance(confidence_threshold,dict):
|
|
590
|
+
rendering_threshold = confidence_threshold[detection['category']]
|
|
591
|
+
else:
|
|
592
|
+
rendering_threshold = confidence_threshold
|
|
593
|
+
|
|
594
|
+
# Always render objects with a confidence of "None", this is typically used
|
|
595
|
+
# for ground truth data.
|
|
596
|
+
if (score is None) or (rendering_threshold is None) or (score >= rendering_threshold):
|
|
597
|
+
|
|
598
|
+
x1, y1, w_box, h_box = detection['bbox']
|
|
599
|
+
display_boxes.append([y1, x1, y1 + h_box, x1 + w_box])
|
|
600
|
+
|
|
601
|
+
# The class index to use for coloring this box, which may be based on the detection
|
|
602
|
+
# category or on the most confident classification category.
|
|
603
|
+
clss = detection['category']
|
|
604
|
+
|
|
605
|
+
# This will be a list of strings that should be rendered above/below this box
|
|
606
|
+
displayed_label = []
|
|
607
|
+
|
|
608
|
+
if label_map is not None:
|
|
609
|
+
label = label_map[clss] if clss in label_map else clss
|
|
610
|
+
if score is not None:
|
|
611
|
+
displayed_label = ['{}: {}%'.format(label, round(100 * score))]
|
|
612
|
+
else:
|
|
613
|
+
displayed_label = ['{}'.format(label)]
|
|
614
|
+
else:
|
|
615
|
+
displayed_label = ['']
|
|
616
|
+
|
|
617
|
+
if custom_strings is not None:
|
|
618
|
+
custom_string = custom_strings[i_detection]
|
|
619
|
+
if custom_string is not None and len(custom_string) > 0:
|
|
620
|
+
assert len(displayed_label) == 1
|
|
621
|
+
displayed_label[0] += ' ' + custom_string
|
|
622
|
+
|
|
623
|
+
if ('classifications' in detection) and len(detection['classifications']) > 0:
|
|
624
|
+
|
|
625
|
+
classifications = detection['classifications']
|
|
626
|
+
|
|
627
|
+
if len(classifications) > max_classifications:
|
|
628
|
+
classifications = classifications[0:max_classifications]
|
|
629
|
+
|
|
630
|
+
max_classification_category = 0
|
|
631
|
+
max_classification_conf = -100
|
|
632
|
+
|
|
633
|
+
for classification in classifications:
|
|
634
|
+
|
|
635
|
+
classification_conf = classification[1]
|
|
636
|
+
if classification_conf is None or \
|
|
637
|
+
classification_conf < classification_confidence_threshold:
|
|
638
|
+
continue
|
|
639
|
+
|
|
640
|
+
class_key = classification[0]
|
|
641
|
+
|
|
642
|
+
# Is this the most confident classification for this detection?
|
|
643
|
+
if classification_conf > max_classification_conf:
|
|
644
|
+
max_classification_conf = classification_conf
|
|
645
|
+
max_classification_category = int(class_key)
|
|
646
|
+
|
|
647
|
+
if (classification_label_map is not None) and (class_key in classification_label_map):
|
|
648
|
+
class_name = classification_label_map[class_key]
|
|
649
|
+
else:
|
|
650
|
+
class_name = class_key
|
|
651
|
+
if classification_conf is not None:
|
|
652
|
+
displayed_label += ['{}: {:5.1%}'.format(class_name.lower(), classification_conf)]
|
|
653
|
+
else:
|
|
654
|
+
displayed_label += ['{}'.format(class_name.lower())]
|
|
655
|
+
|
|
656
|
+
# ...for each classification
|
|
657
|
+
|
|
658
|
+
# To avoid duplicate colors with detection-only visualization, offset
|
|
659
|
+
# the classification class index by the number of detection classes
|
|
660
|
+
clss = annotation_constants.NUM_DETECTOR_CATEGORIES + max_classification_category
|
|
661
|
+
|
|
662
|
+
# ...if we have classification results
|
|
663
|
+
|
|
664
|
+
# display_strs is a list of labels for each box
|
|
665
|
+
display_strs.append(displayed_label)
|
|
666
|
+
classes.append(clss)
|
|
667
|
+
|
|
668
|
+
# ...if the confidence of this detection is above threshold
|
|
669
|
+
|
|
670
|
+
# ...for each detection
|
|
671
|
+
|
|
672
|
+
display_boxes = np.array(display_boxes)
|
|
673
|
+
|
|
674
|
+
if verbose:
|
|
675
|
+
print('Rendering {} of {} detections'.format(len(display_boxes),len(detections)))
|
|
676
|
+
|
|
677
|
+
draw_bounding_boxes_on_image(image,
|
|
678
|
+
display_boxes,
|
|
679
|
+
classes,
|
|
680
|
+
display_strs=display_strs,
|
|
681
|
+
thickness=thickness,
|
|
682
|
+
expansion=expansion,
|
|
683
|
+
colormap=colormap,
|
|
684
|
+
textalign=textalign,
|
|
685
|
+
vtextalign=vtextalign,
|
|
686
|
+
label_font_size=label_font_size)
|
|
687
|
+
|
|
688
|
+
# ...render_detection_bounding_boxes(...)
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def draw_bounding_boxes_on_image(image,
|
|
692
|
+
boxes,
|
|
693
|
+
classes,
|
|
694
|
+
thickness=DEFAULT_BOX_THICKNESS,
|
|
695
|
+
expansion=0,
|
|
696
|
+
display_strs=None,
|
|
697
|
+
colormap=None,
|
|
698
|
+
textalign=TEXTALIGN_LEFT,
|
|
699
|
+
vtextalign=VTEXTALIGN_TOP,
|
|
700
|
+
text_rotation=None,
|
|
701
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE):
|
|
702
|
+
"""
|
|
703
|
+
Draws bounding boxes on an image. Modifies the image in place.
|
|
704
|
+
|
|
705
|
+
Args:
|
|
706
|
+
image (PIL.Image): the image on which we should draw boxes
|
|
707
|
+
boxes (np.array): a two-dimensional numpy array of size [N, 4], where N is the
|
|
708
|
+
number of boxes, and each row is (ymin, xmin, ymax, xmax). Coordinates should be
|
|
709
|
+
normalized to image height/width.
|
|
710
|
+
classes (list): a list of ints or string-formatted ints corresponding to the
|
|
711
|
+
class labels of the boxes. This is only used for color selection. Should have the same
|
|
712
|
+
length as [boxes].
|
|
713
|
+
thickness (int, optional): line thickness in pixels
|
|
714
|
+
expansion (int, optional): number of pixels to expand bounding boxes on each side
|
|
715
|
+
display_strs (list, optional): list of list of strings (the outer list should have the
|
|
716
|
+
same length as [boxes]). Typically this is used to show (possibly multiple) detection
|
|
717
|
+
or classification categories and/or confidence values.
|
|
718
|
+
colormap (list, optional): list of color names, used to choose colors for categories by
|
|
719
|
+
indexing with the values in [classes]; defaults to a reasonable set of colors
|
|
720
|
+
textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
|
|
721
|
+
vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
|
|
722
|
+
text_rotation (float, optional): rotation to apply to text
|
|
723
|
+
label_font_size (float, optional): font size for labels
|
|
724
|
+
"""
|
|
725
|
+
|
|
726
|
+
boxes_shape = boxes.shape
|
|
727
|
+
if not boxes_shape:
|
|
728
|
+
return
|
|
729
|
+
if len(boxes_shape) != 2 or boxes_shape[1] != 4:
|
|
730
|
+
return
|
|
731
|
+
for i in range(boxes_shape[0]):
|
|
732
|
+
display_str_list = None
|
|
733
|
+
if display_strs:
|
|
734
|
+
display_str_list = display_strs[i]
|
|
735
|
+
draw_bounding_box_on_image(image,
|
|
736
|
+
boxes[i, 0], boxes[i, 1], boxes[i, 2], boxes[i, 3],
|
|
737
|
+
classes[i],
|
|
738
|
+
thickness=thickness, expansion=expansion,
|
|
739
|
+
display_str_list=display_str_list,
|
|
740
|
+
colormap=colormap,
|
|
741
|
+
textalign=textalign,
|
|
742
|
+
vtextalign=vtextalign,
|
|
743
|
+
text_rotation=text_rotation,
|
|
744
|
+
label_font_size=label_font_size)
|
|
745
|
+
|
|
746
|
+
# ...draw_bounding_boxes_on_image(...)
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def get_text_size(font,s):
|
|
750
|
+
"""
|
|
751
|
+
Get the expected width and height when rendering the string [s] in the font
|
|
752
|
+
[font].
|
|
753
|
+
|
|
754
|
+
Args:
|
|
755
|
+
font (PIL.ImageFont): the font whose size we should query
|
|
756
|
+
s (str): the string whose size we should query
|
|
757
|
+
|
|
758
|
+
Returns:
|
|
759
|
+
tuple: (w,h), both floats in pixel coordinates
|
|
760
|
+
"""
|
|
761
|
+
|
|
762
|
+
# This is what we did w/Pillow 9
|
|
763
|
+
# w,h = font.getsize(s)
|
|
764
|
+
|
|
765
|
+
# I would *think* this would be the equivalent for Pillow 10
|
|
766
|
+
# l,t,r,b = font.getbbox(s); w = r-l; h=b-t
|
|
767
|
+
|
|
768
|
+
# ...but this actually produces the most similar results to Pillow 9
|
|
769
|
+
# l,t,r,b = font.getbbox(s); w = r; h=b
|
|
770
|
+
|
|
771
|
+
try:
|
|
772
|
+
l,t,r,b = font.getbbox(s); w = r; h=b # noqa
|
|
773
|
+
except Exception:
|
|
774
|
+
w,h = font.getsize(s)
|
|
775
|
+
|
|
776
|
+
return w,h
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def draw_bounding_box_on_image(image,
|
|
780
|
+
ymin,
|
|
781
|
+
xmin,
|
|
782
|
+
ymax,
|
|
783
|
+
xmax,
|
|
784
|
+
clss=None,
|
|
785
|
+
thickness=DEFAULT_BOX_THICKNESS,
|
|
786
|
+
expansion=0,
|
|
787
|
+
display_str_list=None,
|
|
788
|
+
use_normalized_coordinates=True,
|
|
789
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE,
|
|
790
|
+
colormap=None,
|
|
791
|
+
textalign=TEXTALIGN_LEFT,
|
|
792
|
+
vtextalign=VTEXTALIGN_TOP,
|
|
793
|
+
text_rotation=None):
|
|
794
|
+
"""
|
|
795
|
+
Adds a bounding box to an image. Modifies the image in place.
|
|
796
|
+
|
|
797
|
+
Bounding box coordinates can be specified in either absolute (pixel) or
|
|
798
|
+
normalized coordinates by setting the use_normalized_coordinates argument.
|
|
799
|
+
|
|
800
|
+
Each string in display_str_list is displayed on a separate line above the
|
|
801
|
+
bounding box in black text on a rectangle filled with the input 'color'.
|
|
802
|
+
If the top of the bounding box extends to the edge of the image, the strings
|
|
803
|
+
are displayed below the bounding box.
|
|
804
|
+
|
|
805
|
+
Adapted from:
|
|
806
|
+
|
|
807
|
+
https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
|
|
808
|
+
|
|
809
|
+
Args:
|
|
810
|
+
image (PIL.Image.Image): the image on which we should draw a box
|
|
811
|
+
ymin (float): ymin of bounding box
|
|
812
|
+
xmin (float): xmin of bounding box
|
|
813
|
+
ymax (float): ymax of bounding box
|
|
814
|
+
xmax (float): xmax of bounding box
|
|
815
|
+
clss (int, optional): the class index of the object in this bounding box, used for choosing
|
|
816
|
+
a color; should be either an integer or a string-formatted integer
|
|
817
|
+
thickness (int, optional): line thickness in pixels
|
|
818
|
+
expansion (int, optional): number of pixels to expand bounding boxes on each side
|
|
819
|
+
display_str_list (list, optional): list of strings to display above the box (each to be shown on its
|
|
820
|
+
own line)
|
|
821
|
+
use_normalized_coordinates (bool, optional): if True (default), treat coordinates
|
|
822
|
+
ymin, xmin, ymax, xmax as relative to the image, otherwise coordinates as absolute pixel values
|
|
823
|
+
label_font_size (float, optional): font size
|
|
824
|
+
colormap (list, optional): list of color names, used to choose colors for categories by
|
|
825
|
+
indexing with the values in [classes]; defaults to a reasonable set of colors
|
|
826
|
+
textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
|
|
827
|
+
vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
|
|
828
|
+
text_rotation (float, optional): rotation to apply to text
|
|
829
|
+
"""
|
|
830
|
+
|
|
831
|
+
if colormap is None:
|
|
832
|
+
colormap = DEFAULT_COLORS
|
|
833
|
+
|
|
834
|
+
if display_str_list is None:
|
|
835
|
+
display_str_list = []
|
|
836
|
+
|
|
837
|
+
if clss is None:
|
|
838
|
+
# Default to the MegaDetector animal class ID (1)
|
|
839
|
+
color = colormap[1]
|
|
840
|
+
else:
|
|
841
|
+
color = colormap[int(clss) % len(colormap)]
|
|
842
|
+
|
|
843
|
+
draw = ImageDraw.Draw(image)
|
|
844
|
+
im_width, im_height = image.size
|
|
845
|
+
if use_normalized_coordinates:
|
|
846
|
+
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
|
|
847
|
+
ymin * im_height, ymax * im_height)
|
|
848
|
+
else:
|
|
849
|
+
(left, right, top, bottom) = (xmin, xmax, ymin, ymax)
|
|
850
|
+
|
|
851
|
+
if expansion > 0:
|
|
852
|
+
|
|
853
|
+
left -= expansion
|
|
854
|
+
right += expansion
|
|
855
|
+
top -= expansion
|
|
856
|
+
bottom += expansion
|
|
857
|
+
|
|
858
|
+
# Deliberately trimming to the width of the image only in the case where
|
|
859
|
+
# box expansion is turned on. There's not an obvious correct behavior here,
|
|
860
|
+
# but the thinking is that if the caller provided an out-of-range bounding
|
|
861
|
+
# box, they meant to do that, but at least in the eyes of the person writing
|
|
862
|
+
# this comment, if you expand a box for visualization reasons, you don't want
|
|
863
|
+
# to end up with part of a box.
|
|
864
|
+
#
|
|
865
|
+
# A slightly more sophisticated might check whether it was in fact the expansion
|
|
866
|
+
# that made this box larger than the image, but this is the case 99.999% of the time
|
|
867
|
+
# here, so that doesn't seem necessary.
|
|
868
|
+
left = max(left,0); right = max(right,0)
|
|
869
|
+
top = max(top,0); bottom = max(bottom,0)
|
|
870
|
+
|
|
871
|
+
left = min(left,im_width-1); right = min(right,im_width-1)
|
|
872
|
+
top = min(top,im_height-1); bottom = min(bottom,im_height-1)
|
|
873
|
+
|
|
874
|
+
# ...if we need to expand boxes
|
|
875
|
+
|
|
876
|
+
draw.line([(left, top), (left, bottom), (right, bottom),
|
|
877
|
+
(right, top), (left, top)], width=thickness, fill=color)
|
|
878
|
+
|
|
879
|
+
if display_str_list is not None:
|
|
880
|
+
|
|
881
|
+
try:
|
|
882
|
+
font = ImageFont.truetype('arial.ttf', label_font_size)
|
|
883
|
+
except OSError:
|
|
884
|
+
font = ImageFont.load_default()
|
|
885
|
+
|
|
886
|
+
display_str_heights = [get_text_size(font,ds)[1] for ds in display_str_list]
|
|
887
|
+
|
|
888
|
+
# Each display_str has a top and bottom margin of 0.05x.
|
|
889
|
+
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
|
|
890
|
+
|
|
891
|
+
# Reverse list and print from bottom to top
|
|
892
|
+
for i_str,display_str in enumerate(display_str_list[::-1]):
|
|
893
|
+
|
|
894
|
+
# Skip empty strings
|
|
895
|
+
if len(display_str) == 0:
|
|
896
|
+
continue
|
|
897
|
+
|
|
898
|
+
text_width, text_height = get_text_size(font,display_str)
|
|
899
|
+
margin = int(np.ceil(0.05 * text_height))
|
|
900
|
+
|
|
901
|
+
if text_rotation is not None and text_rotation != 0:
|
|
902
|
+
|
|
903
|
+
assert text_rotation == -90, \
|
|
904
|
+
'Only -90-degree text rotation is supported'
|
|
905
|
+
|
|
906
|
+
image_tmp = Image.new('RGB',(text_width+2*margin,text_height+2*margin))
|
|
907
|
+
image_tmp_draw = ImageDraw.Draw(image_tmp)
|
|
908
|
+
image_tmp_draw.rectangle([0,0,text_width+2*margin,text_height+2*margin],fill=color)
|
|
909
|
+
image_tmp_draw.text( (margin,margin), display_str, font=font, fill='black')
|
|
910
|
+
rotated_text = image_tmp.rotate(text_rotation,expand=1)
|
|
911
|
+
|
|
912
|
+
if textalign == TEXTALIGN_RIGHT:
|
|
913
|
+
text_left = right
|
|
914
|
+
else:
|
|
915
|
+
text_left = left
|
|
916
|
+
text_left = int(text_left + (text_height) * i_str)
|
|
917
|
+
|
|
918
|
+
if vtextalign == VTEXTALIGN_BOTTOM:
|
|
919
|
+
text_top = bottom - text_width
|
|
920
|
+
else:
|
|
921
|
+
text_top = top
|
|
922
|
+
text_left = int(text_left)
|
|
923
|
+
text_top = int(text_top)
|
|
924
|
+
|
|
925
|
+
image.paste(rotated_text,[text_left,text_top])
|
|
926
|
+
|
|
927
|
+
else:
|
|
928
|
+
|
|
929
|
+
# If the total height of the display strings added to the top of the bounding
|
|
930
|
+
# box exceeds the top of the image, stack the strings below the bounding box
|
|
931
|
+
# instead of above, and vice-versa if we're bottom-aligning.
|
|
932
|
+
#
|
|
933
|
+
# If the text just doesn't fit outside the box, we don't try anything fancy,
|
|
934
|
+
# it will just appear outside the image.
|
|
935
|
+
if vtextalign == VTEXTALIGN_TOP:
|
|
936
|
+
text_bottom = top
|
|
937
|
+
if (text_bottom - total_display_str_height) < 0:
|
|
938
|
+
text_bottom = bottom + total_display_str_height
|
|
939
|
+
else:
|
|
940
|
+
assert vtextalign == VTEXTALIGN_BOTTOM, \
|
|
941
|
+
'Unrecognized vertical text alignment {}'.format(vtextalign)
|
|
942
|
+
text_bottom = bottom + total_display_str_height
|
|
943
|
+
if (text_bottom + total_display_str_height) > im_height:
|
|
944
|
+
text_bottom = top
|
|
945
|
+
|
|
946
|
+
text_bottom = int(text_bottom) - i_str * (int(text_height + (2 * margin)))
|
|
947
|
+
|
|
948
|
+
text_left = left
|
|
949
|
+
|
|
950
|
+
if textalign == TEXTALIGN_RIGHT:
|
|
951
|
+
text_left = right - text_width
|
|
952
|
+
elif textalign == TEXTALIGN_CENTER:
|
|
953
|
+
text_left = ((right + left) / 2.0) - (text_width / 2.0)
|
|
954
|
+
text_left = int(text_left)
|
|
955
|
+
|
|
956
|
+
draw.rectangle(
|
|
957
|
+
[(text_left, (text_bottom - text_height) - (2 * margin)),
|
|
958
|
+
(text_left + text_width, text_bottom)],
|
|
959
|
+
fill=color)
|
|
960
|
+
|
|
961
|
+
draw.text(
|
|
962
|
+
(text_left + margin, text_bottom - text_height - margin),
|
|
963
|
+
display_str,
|
|
964
|
+
fill='black',
|
|
965
|
+
font=font)
|
|
966
|
+
|
|
967
|
+
# ...if we're rotating text
|
|
968
|
+
|
|
969
|
+
# ...if we're rendering text
|
|
970
|
+
|
|
971
|
+
# ...def draw_bounding_box_on_image(...)
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
def render_megadb_bounding_boxes(boxes_info, image):
|
|
975
|
+
"""
|
|
976
|
+
Render bounding boxes to an image, where those boxes are in the mostly-deprecated
|
|
977
|
+
MegaDB format, which looks like:
|
|
978
|
+
|
|
979
|
+
.. code-block::none
|
|
980
|
+
|
|
981
|
+
{
|
|
982
|
+
"category": "animal",
|
|
983
|
+
"bbox": [
|
|
984
|
+
0.739,
|
|
985
|
+
0.448,
|
|
986
|
+
0.187,
|
|
987
|
+
0.198
|
|
988
|
+
]
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
Args:
|
|
992
|
+
boxes_info (list): list of dicts, each dict represents a single detection
|
|
993
|
+
where bbox coordinates are normalized [x_min, y_min, width, height]
|
|
994
|
+
image (PIL.Image.Image): image to modify
|
|
995
|
+
|
|
996
|
+
:meta private:
|
|
997
|
+
"""
|
|
998
|
+
|
|
999
|
+
display_boxes = []
|
|
1000
|
+
display_strs = []
|
|
1001
|
+
classes = [] # ints, for selecting colors
|
|
1002
|
+
|
|
1003
|
+
for b in boxes_info:
|
|
1004
|
+
x_min, y_min, w_rel, h_rel = b['bbox']
|
|
1005
|
+
y_max = y_min + h_rel
|
|
1006
|
+
x_max = x_min + w_rel
|
|
1007
|
+
display_boxes.append([y_min, x_min, y_max, x_max])
|
|
1008
|
+
display_strs.append([b['category']])
|
|
1009
|
+
classes.append(annotation_constants.detector_bbox_category_name_to_id[b['category']])
|
|
1010
|
+
|
|
1011
|
+
display_boxes = np.array(display_boxes)
|
|
1012
|
+
draw_bounding_boxes_on_image(image, display_boxes, classes, display_strs=display_strs)
|
|
1013
|
+
|
|
1014
|
+
# ...def render_iMerit_boxes(...)
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
def render_db_bounding_boxes(boxes,
|
|
1018
|
+
classes,
|
|
1019
|
+
image,
|
|
1020
|
+
original_size=None,
|
|
1021
|
+
label_map=None,
|
|
1022
|
+
thickness=DEFAULT_BOX_THICKNESS,
|
|
1023
|
+
expansion=0,
|
|
1024
|
+
colormap=None,
|
|
1025
|
+
textalign=TEXTALIGN_LEFT,
|
|
1026
|
+
vtextalign=VTEXTALIGN_TOP,
|
|
1027
|
+
text_rotation=None,
|
|
1028
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE,
|
|
1029
|
+
tags=None,
|
|
1030
|
+
boxes_are_normalized=False):
|
|
1031
|
+
"""
|
|
1032
|
+
Render bounding boxes (with class labels) on an image. This is a wrapper for
|
|
1033
|
+
draw_bounding_boxes_on_image, allowing the caller to operate on a resized image
|
|
1034
|
+
by providing the original size of the image; boxes will be scaled accordingly.
|
|
1035
|
+
|
|
1036
|
+
This function assumes that bounding boxes are in absolute coordinates, typically
|
|
1037
|
+
because they come from COCO camera traps .json files, unless boxes_are_normalized
|
|
1038
|
+
is True.
|
|
1039
|
+
|
|
1040
|
+
Args:
|
|
1041
|
+
boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
|
|
1042
|
+
classes (list): list of ints (or string-formatted ints), used to choose labels (either
|
|
1043
|
+
by literally rendering the class labels, or by indexing into [label_map])
|
|
1044
|
+
image (PIL.Image.Image): image object to modify
|
|
1045
|
+
original_size (tuple, optional): if this is not None, and the size is different than
|
|
1046
|
+
the size of [image], we assume that [boxes] refer to the original size, and we scale
|
|
1047
|
+
them accordingly before rendering
|
|
1048
|
+
label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
|
|
1049
|
+
species labels; if None, category labels are rendered verbatim (typically as numbers)
|
|
1050
|
+
thickness (int, optional): line width
|
|
1051
|
+
expansion (int, optional): a number of pixels to include on each side of a cropped
|
|
1052
|
+
detection
|
|
1053
|
+
colormap (list, optional): list of color names, used to choose colors for categories by
|
|
1054
|
+
indexing with the values in [classes]; defaults to a reasonable set of colors
|
|
1055
|
+
textalign (int, optional): TEXTALIGN_LEFT, TEXTALIGN_CENTER, or TEXTALIGN_RIGHT
|
|
1056
|
+
vtextalign (int, optional): VTEXTALIGN_TOP or VTEXTALIGN_BOTTOM
|
|
1057
|
+
text_rotation (float, optional): rotation to apply to text
|
|
1058
|
+
label_font_size (float, optional): font size for labels
|
|
1059
|
+
tags (list, optional): list of strings of length len(boxes) that should be appended
|
|
1060
|
+
after each class name (e.g. to show scores)
|
|
1061
|
+
boxes_are_normalized (bool, optional): whether boxes have already been normalized
|
|
1062
|
+
"""
|
|
1063
|
+
|
|
1064
|
+
display_boxes = []
|
|
1065
|
+
display_strs = []
|
|
1066
|
+
|
|
1067
|
+
if original_size is not None:
|
|
1068
|
+
image_size = original_size
|
|
1069
|
+
else:
|
|
1070
|
+
image_size = image.size
|
|
1071
|
+
|
|
1072
|
+
img_width, img_height = image_size
|
|
1073
|
+
|
|
1074
|
+
for i_box in range(0,len(boxes)):
|
|
1075
|
+
|
|
1076
|
+
box = boxes[i_box]
|
|
1077
|
+
clss = classes[i_box]
|
|
1078
|
+
|
|
1079
|
+
x_min_abs, y_min_abs, width_abs, height_abs = box[0:4]
|
|
1080
|
+
|
|
1081
|
+
# Normalize boxes if necessary
|
|
1082
|
+
if boxes_are_normalized:
|
|
1083
|
+
|
|
1084
|
+
xmin = x_min_abs
|
|
1085
|
+
xmax = x_min_abs + width_abs
|
|
1086
|
+
ymin = y_min_abs
|
|
1087
|
+
ymax = y_min_abs + height_abs
|
|
1088
|
+
|
|
1089
|
+
else:
|
|
1090
|
+
|
|
1091
|
+
ymin = y_min_abs / img_height
|
|
1092
|
+
ymax = ymin + height_abs / img_height
|
|
1093
|
+
|
|
1094
|
+
xmin = x_min_abs / img_width
|
|
1095
|
+
xmax = xmin + width_abs / img_width
|
|
1096
|
+
|
|
1097
|
+
display_boxes.append([ymin, xmin, ymax, xmax])
|
|
1098
|
+
|
|
1099
|
+
if label_map:
|
|
1100
|
+
clss = label_map[int(clss)]
|
|
1101
|
+
|
|
1102
|
+
display_str = str(clss)
|
|
1103
|
+
|
|
1104
|
+
# Do we have a tag to append to the class string?
|
|
1105
|
+
if tags is not None and tags[i_box] is not None and len(tags[i_box]) > 0:
|
|
1106
|
+
display_str += ' ' + tags[i_box]
|
|
1107
|
+
|
|
1108
|
+
# need to be a string here because PIL needs to iterate through chars
|
|
1109
|
+
display_strs.append([display_str])
|
|
1110
|
+
|
|
1111
|
+
# ...for each box
|
|
1112
|
+
|
|
1113
|
+
display_boxes = np.array(display_boxes)
|
|
1114
|
+
|
|
1115
|
+
draw_bounding_boxes_on_image(image,
|
|
1116
|
+
display_boxes,
|
|
1117
|
+
classes,
|
|
1118
|
+
display_strs=display_strs,
|
|
1119
|
+
thickness=thickness,
|
|
1120
|
+
expansion=expansion,
|
|
1121
|
+
colormap=colormap,
|
|
1122
|
+
textalign=textalign,
|
|
1123
|
+
vtextalign=vtextalign,
|
|
1124
|
+
text_rotation=text_rotation,
|
|
1125
|
+
label_font_size=label_font_size)
|
|
1126
|
+
|
|
1127
|
+
# ...def render_db_bounding_boxes(...)
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
def draw_bounding_boxes_on_file(input_file,
|
|
1131
|
+
output_file,
|
|
1132
|
+
detections,
|
|
1133
|
+
confidence_threshold=0.0,
|
|
1134
|
+
detector_label_map=DEFAULT_DETECTOR_LABEL_MAP,
|
|
1135
|
+
thickness=DEFAULT_BOX_THICKNESS,
|
|
1136
|
+
expansion=0,
|
|
1137
|
+
colormap=None,
|
|
1138
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE,
|
|
1139
|
+
custom_strings=None,
|
|
1140
|
+
target_size=None,
|
|
1141
|
+
ignore_exif_rotation=False):
|
|
1142
|
+
"""
|
|
1143
|
+
Renders detection bounding boxes on an image loaded from file, optionally writing the results to
|
|
1144
|
+
a new image file.
|
|
1145
|
+
|
|
1146
|
+
Args:
|
|
1147
|
+
input_file (str): filename or URL to load
|
|
1148
|
+
output_file (str): filename to which we should write the rendered image
|
|
1149
|
+
detections (list): a list of dictionaries with keys 'conf', 'bbox', and 'category';
|
|
1150
|
+
boxes are length-four arrays formatted as [x,y,w,h], normalized,
|
|
1151
|
+
upper-left origin (this is the standard MD detection format). 'category' is a string-int.
|
|
1152
|
+
confidence_threshold (float, optional): only render detections with confidence above this
|
|
1153
|
+
threshold
|
|
1154
|
+
detector_label_map (dict, optional): a dict mapping category IDs to strings. If this
|
|
1155
|
+
is None, no confidence values or identifiers are shown. If this is {}, just category
|
|
1156
|
+
indices and confidence values are shown.
|
|
1157
|
+
thickness (int, optional): line width in pixels for box rendering
|
|
1158
|
+
expansion (int, optional): box expansion in pixels
|
|
1159
|
+
colormap (list, optional): list of color names, used to choose colors for categories by
|
|
1160
|
+
indexing with the values in [classes]; defaults to a reasonable set of colors
|
|
1161
|
+
label_font_size (float, optional): label font size
|
|
1162
|
+
custom_strings (list, optional): set of strings to append to detection labels, should have the
|
|
1163
|
+
same length as [detections]. Appended before any classification labels.
|
|
1164
|
+
target_size (tuple, optional): tuple of (target_width,target_height). Either or both can be -1,
|
|
1165
|
+
see resize_image() for documentation. If None or (-1,-1), uses the original image size.
|
|
1166
|
+
ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
|
|
1167
|
+
even if we are loading a JPEG and that JPEG says it should be rotated.
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
PIL.Image.Image: loaded and modified image
|
|
1171
|
+
"""
|
|
1172
|
+
|
|
1173
|
+
image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
|
|
1174
|
+
|
|
1175
|
+
if target_size is not None:
|
|
1176
|
+
image = resize_image(image,target_size[0],target_size[1])
|
|
1177
|
+
|
|
1178
|
+
render_detection_bounding_boxes(
|
|
1179
|
+
detections,
|
|
1180
|
+
image,
|
|
1181
|
+
label_map=detector_label_map,
|
|
1182
|
+
confidence_threshold=confidence_threshold,
|
|
1183
|
+
thickness=thickness,
|
|
1184
|
+
expansion=expansion,
|
|
1185
|
+
colormap=colormap,
|
|
1186
|
+
custom_strings=custom_strings,
|
|
1187
|
+
label_font_size=label_font_size)
|
|
1188
|
+
|
|
1189
|
+
if output_file is not None:
|
|
1190
|
+
image.save(output_file)
|
|
1191
|
+
|
|
1192
|
+
return image
|
|
1193
|
+
|
|
1194
|
+
|
|
1195
|
+
def draw_db_boxes_on_file(input_file,
|
|
1196
|
+
output_file,
|
|
1197
|
+
boxes,
|
|
1198
|
+
classes=None,
|
|
1199
|
+
label_map=None,
|
|
1200
|
+
thickness=DEFAULT_BOX_THICKNESS,
|
|
1201
|
+
expansion=0,
|
|
1202
|
+
ignore_exif_rotation=False):
|
|
1203
|
+
"""
|
|
1204
|
+
Render COCO-formatted bounding boxes (in absolute coordinates) on an image loaded from file,
|
|
1205
|
+
writing the results to a new image file.
|
|
1206
|
+
|
|
1207
|
+
Args:
|
|
1208
|
+
input_file (str): image file to read
|
|
1209
|
+
output_file (str): image file to write
|
|
1210
|
+
boxes (list): list of length-4 tuples, foramtted as (x,y,w,h) (in pixels)
|
|
1211
|
+
classes (list, optional): list of ints (or string-formatted ints), used to choose
|
|
1212
|
+
labels (either by literally rendering the class labels, or by indexing into [label_map])
|
|
1213
|
+
label_map (dict, optional): int --> str dictionary, typically mapping category IDs to
|
|
1214
|
+
species labels; if None, category labels are rendered verbatim (typically as numbers)
|
|
1215
|
+
thickness (int, optional): line width
|
|
1216
|
+
expansion (int, optional): a number of pixels to include on each side of a cropped
|
|
1217
|
+
detection
|
|
1218
|
+
ignore_exif_rotation (bool, optional): don't rotate the loaded pixels,
|
|
1219
|
+
even if we are loading a JPEG and that JPEG says it should be rotated
|
|
1220
|
+
|
|
1221
|
+
Returns:
|
|
1222
|
+
PIL.Image.Image: the loaded and modified image
|
|
1223
|
+
"""
|
|
1224
|
+
|
|
1225
|
+
image = open_image(input_file, ignore_exif_rotation=ignore_exif_rotation)
|
|
1226
|
+
|
|
1227
|
+
if classes is None:
|
|
1228
|
+
classes = [0] * len(boxes)
|
|
1229
|
+
|
|
1230
|
+
render_db_bounding_boxes(boxes,
|
|
1231
|
+
classes,
|
|
1232
|
+
image,
|
|
1233
|
+
original_size=None,
|
|
1234
|
+
label_map=label_map,
|
|
1235
|
+
thickness=thickness,
|
|
1236
|
+
expansion=expansion)
|
|
1237
|
+
|
|
1238
|
+
image.save(output_file)
|
|
1239
|
+
|
|
1240
|
+
return image
|
|
1241
|
+
|
|
1242
|
+
# ...def draw_bounding_boxes_on_file(...)
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
def gray_scale_fraction(image,crop_size=(0.1,0.1)):
|
|
1246
|
+
"""
|
|
1247
|
+
Computes the fraction of the pixels in [image] that appear to be grayscale (R==G==B),
|
|
1248
|
+
useful for approximating whether this is a night-time image when flash information is not
|
|
1249
|
+
available in EXIF data (or for video frames, where this information is often not available
|
|
1250
|
+
in structured metadata at all).
|
|
1251
|
+
|
|
1252
|
+
Args:
|
|
1253
|
+
image (str or PIL.Image.Image): Image, filename, or URL to analyze
|
|
1254
|
+
crop_size (tuple of floats, optional): a 2-element list/tuple, representing the fraction of
|
|
1255
|
+
the image to crop at the top and bottom, respectively, before analyzing (to minimize
|
|
1256
|
+
the possibility of including color elements in the image overlay)
|
|
1257
|
+
|
|
1258
|
+
Returns:
|
|
1259
|
+
float: the fraction of pixels in [image] that appear to be grayscale (R==G==B)
|
|
1260
|
+
"""
|
|
1261
|
+
|
|
1262
|
+
if isinstance(image,str):
|
|
1263
|
+
image = Image.open(image)
|
|
1264
|
+
|
|
1265
|
+
if image.mode == 'L':
|
|
1266
|
+
return 1.0
|
|
1267
|
+
|
|
1268
|
+
if len(image.getbands()) == 1:
|
|
1269
|
+
return 1.0
|
|
1270
|
+
|
|
1271
|
+
# Crop if necessary
|
|
1272
|
+
if crop_size[0] > 0 or crop_size[1] > 0:
|
|
1273
|
+
|
|
1274
|
+
assert (crop_size[0] + crop_size[1]) < 1.0, \
|
|
1275
|
+
'Illegal crop size: {}'.format(str(crop_size))
|
|
1276
|
+
|
|
1277
|
+
top_crop_pixels = int(image.height * crop_size[0])
|
|
1278
|
+
bottom_crop_pixels = int(image.height * crop_size[1])
|
|
1279
|
+
|
|
1280
|
+
left = 0
|
|
1281
|
+
right = image.width
|
|
1282
|
+
|
|
1283
|
+
# Remove pixels from the top
|
|
1284
|
+
first_crop_top = top_crop_pixels
|
|
1285
|
+
first_crop_bottom = image.height
|
|
1286
|
+
first_crop = image.crop((left, first_crop_top, right, first_crop_bottom))
|
|
1287
|
+
|
|
1288
|
+
# Remove pixels from the bottom
|
|
1289
|
+
second_crop_top = 0
|
|
1290
|
+
second_crop_bottom = first_crop.height - bottom_crop_pixels
|
|
1291
|
+
second_crop = first_crop.crop((left, second_crop_top, right, second_crop_bottom))
|
|
1292
|
+
|
|
1293
|
+
image = second_crop
|
|
1294
|
+
|
|
1295
|
+
# It doesn't matter if these are actually R/G/B, they're just names
|
|
1296
|
+
r = np.array(image.getchannel(0))
|
|
1297
|
+
g = np.array(image.getchannel(1))
|
|
1298
|
+
b = np.array(image.getchannel(2))
|
|
1299
|
+
|
|
1300
|
+
gray_pixels = np.logical_and(r == g, r == b)
|
|
1301
|
+
n_pixels = gray_pixels.size
|
|
1302
|
+
n_gray_pixels = gray_pixels.sum()
|
|
1303
|
+
|
|
1304
|
+
return n_gray_pixels / n_pixels
|
|
1305
|
+
|
|
1306
|
+
# Non-numpy way to do the same thing, briefly keeping this here for posterity
|
|
1307
|
+
if False:
|
|
1308
|
+
|
|
1309
|
+
w, h = image.size
|
|
1310
|
+
n_pixels = w*h
|
|
1311
|
+
n_gray_pixels = 0
|
|
1312
|
+
for i in range(w):
|
|
1313
|
+
for j in range(h):
|
|
1314
|
+
r, g, b = image.getpixel((i,j))
|
|
1315
|
+
if r == g and r == b and g == b:
|
|
1316
|
+
n_gray_pixels += 1
|
|
1317
|
+
|
|
1318
|
+
# ...def gray_scale_fraction(...)
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
def _resize_relative_image(fn_relative,
|
|
1322
|
+
input_folder,
|
|
1323
|
+
output_folder,
|
|
1324
|
+
target_width,
|
|
1325
|
+
target_height,
|
|
1326
|
+
no_enlarge_width,
|
|
1327
|
+
verbose,
|
|
1328
|
+
quality,
|
|
1329
|
+
overwrite=True):
|
|
1330
|
+
"""
|
|
1331
|
+
Internal function for resizing an image from one folder to another,
|
|
1332
|
+
maintaining relative path.
|
|
1333
|
+
"""
|
|
1334
|
+
|
|
1335
|
+
input_fn_abs = os.path.join(input_folder,fn_relative)
|
|
1336
|
+
output_fn_abs = os.path.join(output_folder,fn_relative)
|
|
1337
|
+
|
|
1338
|
+
if (not overwrite) and (os.path.isfile(output_fn_abs)):
|
|
1339
|
+
status = 'skipped'
|
|
1340
|
+
error = None
|
|
1341
|
+
return {'fn_relative':fn_relative,'status':status,'error':error}
|
|
1342
|
+
|
|
1343
|
+
os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
|
|
1344
|
+
try:
|
|
1345
|
+
_ = resize_image(input_fn_abs,
|
|
1346
|
+
output_file=output_fn_abs,
|
|
1347
|
+
target_width=target_width,
|
|
1348
|
+
target_height=target_height,
|
|
1349
|
+
no_enlarge_width=no_enlarge_width,
|
|
1350
|
+
verbose=verbose,
|
|
1351
|
+
quality=quality)
|
|
1352
|
+
status = 'success'
|
|
1353
|
+
error = None
|
|
1354
|
+
except Exception as e:
|
|
1355
|
+
if verbose:
|
|
1356
|
+
print('Error resizing {}: {}'.format(fn_relative,str(e)))
|
|
1357
|
+
status = 'error'
|
|
1358
|
+
error = str(e)
|
|
1359
|
+
|
|
1360
|
+
return {'fn_relative':fn_relative,'status':status,'error':error}
|
|
1361
|
+
|
|
1362
|
+
# ...def _resize_relative_image(...)
|
|
1363
|
+
|
|
1364
|
+
|
|
1365
|
+
def _resize_absolute_image(input_output_files,
|
|
1366
|
+
target_width,
|
|
1367
|
+
target_height,
|
|
1368
|
+
no_enlarge_width,
|
|
1369
|
+
verbose,
|
|
1370
|
+
quality):
|
|
1371
|
+
"""
|
|
1372
|
+
Internal wrapper for resize_image used in the context of a batch resize operation.
|
|
1373
|
+
"""
|
|
1374
|
+
|
|
1375
|
+
input_fn_abs = input_output_files[0]
|
|
1376
|
+
output_fn_abs = input_output_files[1]
|
|
1377
|
+
os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
|
|
1378
|
+
try:
|
|
1379
|
+
_ = resize_image(input_fn_abs,
|
|
1380
|
+
output_file=output_fn_abs,
|
|
1381
|
+
target_width=target_width,
|
|
1382
|
+
target_height=target_height,
|
|
1383
|
+
no_enlarge_width=no_enlarge_width,
|
|
1384
|
+
verbose=verbose,
|
|
1385
|
+
quality=quality)
|
|
1386
|
+
status = 'success'
|
|
1387
|
+
error = None
|
|
1388
|
+
except Exception as e:
|
|
1389
|
+
if verbose:
|
|
1390
|
+
print('Error resizing {}: {}'.format(input_fn_abs,str(e)))
|
|
1391
|
+
status = 'error'
|
|
1392
|
+
error = str(e)
|
|
1393
|
+
|
|
1394
|
+
return {'input_fn':input_fn_abs,
|
|
1395
|
+
'output_fn':output_fn_abs,
|
|
1396
|
+
'status':status,
|
|
1397
|
+
'error':error}
|
|
1398
|
+
|
|
1399
|
+
# ..._resize_absolute_image(...)
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
def resize_images(input_file_to_output_file,
|
|
1403
|
+
target_width=-1,
|
|
1404
|
+
target_height=-1,
|
|
1405
|
+
no_enlarge_width=False,
|
|
1406
|
+
verbose=False,
|
|
1407
|
+
quality='keep',
|
|
1408
|
+
pool_type='process',
|
|
1409
|
+
n_workers=10):
|
|
1410
|
+
"""
|
|
1411
|
+
Resizes all images the dictionary [input_file_to_output_file].
|
|
1412
|
+
|
|
1413
|
+
TODO: This is a little more redundant with resize_image_folder than I would like;
|
|
1414
|
+
refactor resize_image_folder to call resize_images. Not doing that yet because
|
|
1415
|
+
at the time I'm writing this comment, a lot of code depends on resize_image_folder
|
|
1416
|
+
and I don't want to rock the boat yet.
|
|
1417
|
+
|
|
1418
|
+
Args:
|
|
1419
|
+
input_file_to_output_file (dict): dict mapping images that exist to the locations
|
|
1420
|
+
where the resized versions should be written
|
|
1421
|
+
target_width (int, optional): width to which we should resize this image, or -1
|
|
1422
|
+
to let target_height determine the size
|
|
1423
|
+
target_height (int, optional): height to which we should resize this image, or -1
|
|
1424
|
+
to let target_width determine the size
|
|
1425
|
+
no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
|
|
1426
|
+
[target width] is larger than the original image width, does not modify the image,
|
|
1427
|
+
but will write to output_file if supplied
|
|
1428
|
+
verbose (bool, optional): enable additional debug output
|
|
1429
|
+
quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
|
|
1430
|
+
pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
|
|
1431
|
+
parallelization; ignored if n_workers <= 1
|
|
1432
|
+
n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
|
|
1433
|
+
to disable parallelization
|
|
1434
|
+
|
|
1435
|
+
Returns:
|
|
1436
|
+
list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
|
|
1437
|
+
'status' will be 'success' or 'error'; 'error' will be None for successful cases,
|
|
1438
|
+
otherwise will contain the image-specific error.
|
|
1439
|
+
"""
|
|
1440
|
+
|
|
1441
|
+
assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
|
|
1442
|
+
|
|
1443
|
+
input_output_file_pairs = []
|
|
1444
|
+
|
|
1445
|
+
# Reformat input files as (input,output) tuples
|
|
1446
|
+
for input_fn in input_file_to_output_file:
|
|
1447
|
+
input_output_file_pairs.append((input_fn,input_file_to_output_file[input_fn]))
|
|
1448
|
+
|
|
1449
|
+
if n_workers == 1:
|
|
1450
|
+
|
|
1451
|
+
results = []
|
|
1452
|
+
for i_o_file_pair in tqdm(input_output_file_pairs):
|
|
1453
|
+
results.append(_resize_absolute_image(i_o_file_pair,
|
|
1454
|
+
target_width=target_width,
|
|
1455
|
+
target_height=target_height,
|
|
1456
|
+
no_enlarge_width=no_enlarge_width,
|
|
1457
|
+
verbose=verbose,
|
|
1458
|
+
quality=quality))
|
|
1459
|
+
|
|
1460
|
+
else:
|
|
1461
|
+
|
|
1462
|
+
pool = None
|
|
1463
|
+
|
|
1464
|
+
try:
|
|
1465
|
+
|
|
1466
|
+
if pool_type == 'thread':
|
|
1467
|
+
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1468
|
+
else:
|
|
1469
|
+
assert pool_type == 'process'
|
|
1470
|
+
pool = Pool(n_workers); poolstring = 'processes'
|
|
1471
|
+
|
|
1472
|
+
if verbose:
|
|
1473
|
+
print('Starting resizing pool with {} {}'.format(n_workers,poolstring))
|
|
1474
|
+
|
|
1475
|
+
p = partial(_resize_absolute_image,
|
|
1476
|
+
target_width=target_width,
|
|
1477
|
+
target_height=target_height,
|
|
1478
|
+
no_enlarge_width=no_enlarge_width,
|
|
1479
|
+
verbose=verbose,
|
|
1480
|
+
quality=quality)
|
|
1481
|
+
|
|
1482
|
+
results = list(tqdm(pool.imap(p, input_output_file_pairs),total=len(input_output_file_pairs)))
|
|
1483
|
+
|
|
1484
|
+
finally:
|
|
1485
|
+
|
|
1486
|
+
if pool is not None:
|
|
1487
|
+
pool.close()
|
|
1488
|
+
pool.join()
|
|
1489
|
+
print('Pool closed and joined for image resizing')
|
|
1490
|
+
|
|
1491
|
+
return results
|
|
1492
|
+
|
|
1493
|
+
# ...def resize_images(...)
|
|
1494
|
+
|
|
1495
|
+
|
|
1496
|
+
def resize_image_folder(input_folder,
|
|
1497
|
+
output_folder=None,
|
|
1498
|
+
target_width=-1,
|
|
1499
|
+
target_height=-1,
|
|
1500
|
+
no_enlarge_width=False,
|
|
1501
|
+
verbose=False,
|
|
1502
|
+
quality='keep',
|
|
1503
|
+
pool_type='process',
|
|
1504
|
+
n_workers=10,
|
|
1505
|
+
recursive=True,
|
|
1506
|
+
image_files_relative=None,
|
|
1507
|
+
overwrite=True):
|
|
1508
|
+
"""
|
|
1509
|
+
Resize all images in a folder (defaults to recursive).
|
|
1510
|
+
|
|
1511
|
+
Defaults to in-place resizing (output_folder is optional).
|
|
1512
|
+
|
|
1513
|
+
Args:
|
|
1514
|
+
input_folder (str): folder in which we should find images to resize
|
|
1515
|
+
output_folder (str, optional): folder in which we should write resized images. If
|
|
1516
|
+
None, resizes images in place. Otherwise, maintains relative paths in the target
|
|
1517
|
+
folder.
|
|
1518
|
+
target_width (int, optional): width to which we should resize this image, or -1
|
|
1519
|
+
to let target_height determine the size
|
|
1520
|
+
target_height (int, optional): height to which we should resize this image, or -1
|
|
1521
|
+
to let target_width determine the size
|
|
1522
|
+
no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
|
|
1523
|
+
[target width] is larger than the original image width, does not modify the image,
|
|
1524
|
+
but will write to output_file if supplied
|
|
1525
|
+
verbose (bool, optional): enable additional debug output
|
|
1526
|
+
quality (str or int, optional): passed to exif_preserving_save, see docs for more detail
|
|
1527
|
+
pool_type (str, optional): whether use use processes ('process') or threads ('thread') for
|
|
1528
|
+
parallelization; ignored if n_workers <= 1
|
|
1529
|
+
n_workers (int, optional): number of workers to use for parallel resizing; set to <=1
|
|
1530
|
+
to disable parallelization
|
|
1531
|
+
recursive (bool, optional): whether to search [input_folder] recursively for images.
|
|
1532
|
+
image_files_relative (list, optional): if not None, skips any relative paths not
|
|
1533
|
+
in this list
|
|
1534
|
+
overwrite (bool, optional): whether to overwrite existing target images
|
|
1535
|
+
|
|
1536
|
+
Returns:
|
|
1537
|
+
list: a list of dicts with keys 'input_fn', 'output_fn', 'status', and 'error'.
|
|
1538
|
+
'status' will be 'success', 'skipped', or 'error'; 'error' will be None for successful
|
|
1539
|
+
cases, otherwise will contain the image-specific error.
|
|
1540
|
+
"""
|
|
1541
|
+
|
|
1542
|
+
assert os.path.isdir(input_folder), '{} is not a folder'.format(input_folder)
|
|
1543
|
+
|
|
1544
|
+
if output_folder is None:
|
|
1545
|
+
output_folder = input_folder
|
|
1546
|
+
else:
|
|
1547
|
+
os.makedirs(output_folder,exist_ok=True)
|
|
1548
|
+
|
|
1549
|
+
assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
|
|
1550
|
+
|
|
1551
|
+
if image_files_relative is None:
|
|
1552
|
+
|
|
1553
|
+
if verbose:
|
|
1554
|
+
print('Enumerating images')
|
|
1555
|
+
|
|
1556
|
+
image_files_relative = find_images(input_folder,recursive=recursive,
|
|
1557
|
+
return_relative_paths=True,convert_slashes=True)
|
|
1558
|
+
if verbose:
|
|
1559
|
+
print('Found {} images'.format(len(image_files_relative)))
|
|
1560
|
+
|
|
1561
|
+
if n_workers == 1:
|
|
1562
|
+
|
|
1563
|
+
if verbose:
|
|
1564
|
+
print('Resizing images')
|
|
1565
|
+
|
|
1566
|
+
results = []
|
|
1567
|
+
for fn_relative in tqdm(image_files_relative):
|
|
1568
|
+
results.append(_resize_relative_image(fn_relative,
|
|
1569
|
+
input_folder=input_folder,
|
|
1570
|
+
output_folder=output_folder,
|
|
1571
|
+
target_width=target_width,
|
|
1572
|
+
target_height=target_height,
|
|
1573
|
+
no_enlarge_width=no_enlarge_width,
|
|
1574
|
+
verbose=verbose,
|
|
1575
|
+
quality=quality,
|
|
1576
|
+
overwrite=overwrite))
|
|
1577
|
+
|
|
1578
|
+
else:
|
|
1579
|
+
|
|
1580
|
+
if pool_type == 'thread':
|
|
1581
|
+
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1582
|
+
else:
|
|
1583
|
+
assert pool_type == 'process'
|
|
1584
|
+
pool = Pool(n_workers); poolstring = 'processes'
|
|
1585
|
+
|
|
1586
|
+
if verbose:
|
|
1587
|
+
print('Starting resizing pool with {} {}'.format(n_workers,poolstring))
|
|
1588
|
+
|
|
1589
|
+
p = partial(_resize_relative_image,
|
|
1590
|
+
input_folder=input_folder,
|
|
1591
|
+
output_folder=output_folder,
|
|
1592
|
+
target_width=target_width,
|
|
1593
|
+
target_height=target_height,
|
|
1594
|
+
no_enlarge_width=no_enlarge_width,
|
|
1595
|
+
verbose=verbose,
|
|
1596
|
+
quality=quality,
|
|
1597
|
+
overwrite=overwrite)
|
|
1598
|
+
|
|
1599
|
+
results = list(tqdm(pool.imap(p, image_files_relative),
|
|
1600
|
+
total=len(image_files_relative)))
|
|
1601
|
+
|
|
1602
|
+
return results
|
|
1603
|
+
|
|
1604
|
+
# ...def resize_image_folder(...)
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def get_image_size(im,verbose=False):
|
|
1608
|
+
"""
|
|
1609
|
+
Retrieve the size of an image. Returns None if the image fails to load.
|
|
1610
|
+
|
|
1611
|
+
Args:
|
|
1612
|
+
im (str or PIL.Image): filename or PIL image
|
|
1613
|
+
verbose (bool, optional): enable additional debug output
|
|
1614
|
+
|
|
1615
|
+
Returns:
|
|
1616
|
+
tuple (w,h), or None if the image fails to load.
|
|
1617
|
+
"""
|
|
1618
|
+
|
|
1619
|
+
image_name = '[in memory]'
|
|
1620
|
+
|
|
1621
|
+
try:
|
|
1622
|
+
if isinstance(im,str):
|
|
1623
|
+
image_name = im
|
|
1624
|
+
im = load_image(im)
|
|
1625
|
+
w = im.width
|
|
1626
|
+
h = im.height
|
|
1627
|
+
if w <= 0 or h <= 0:
|
|
1628
|
+
if verbose:
|
|
1629
|
+
print('Error reading width from image {}: {},{}'.format(
|
|
1630
|
+
image_name,w,h))
|
|
1631
|
+
return None
|
|
1632
|
+
return (w,h)
|
|
1633
|
+
except Exception as e:
|
|
1634
|
+
if verbose:
|
|
1635
|
+
print('Error reading width from image {}: {}'.format(
|
|
1636
|
+
image_name,str(e)))
|
|
1637
|
+
return None
|
|
1638
|
+
|
|
1639
|
+
# ...def get_image_size(...)
|
|
1640
|
+
|
|
1641
|
+
|
|
1642
|
+
def parallel_get_image_sizes(filenames,
|
|
1643
|
+
max_workers=16,
|
|
1644
|
+
use_threads=True,
|
|
1645
|
+
recursive=True,
|
|
1646
|
+
verbose=False):
|
|
1647
|
+
"""
|
|
1648
|
+
Retrieve image sizes for a list or folder of images
|
|
1649
|
+
|
|
1650
|
+
Args:
|
|
1651
|
+
filenames (list or str): a list of image filenames or a folder. Non-image files and
|
|
1652
|
+
unreadable images will be returned with a file size of None.
|
|
1653
|
+
max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
|
|
1654
|
+
parallelization
|
|
1655
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
1656
|
+
parallelization
|
|
1657
|
+
recursive (bool, optional): if [filenames] is a folder, whether to search recursively
|
|
1658
|
+
for images. Ignored if [filenames] is a list.
|
|
1659
|
+
verbose (bool, optional): enable additional debug output
|
|
1660
|
+
|
|
1661
|
+
Returns:
|
|
1662
|
+
dict: a dict mapping filenames to (w,h) tuples; the value will be None for images that fail
|
|
1663
|
+
to load. Filenames will always be absolute.
|
|
1664
|
+
"""
|
|
1665
|
+
|
|
1666
|
+
if isinstance(filenames,str) and os.path.isdir(filenames):
|
|
1667
|
+
if verbose:
|
|
1668
|
+
print('Enumerating images in {}'.format(filenames))
|
|
1669
|
+
filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
|
|
1670
|
+
|
|
1671
|
+
n_workers = min(max_workers,len(filenames))
|
|
1672
|
+
|
|
1673
|
+
if verbose:
|
|
1674
|
+
print('Getting image sizes for {} images'.format(len(filenames)))
|
|
1675
|
+
|
|
1676
|
+
if n_workers <= 1:
|
|
1677
|
+
|
|
1678
|
+
results = []
|
|
1679
|
+
for filename in filenames:
|
|
1680
|
+
results.append(get_image_size(filename,verbose=verbose))
|
|
1681
|
+
|
|
1682
|
+
else:
|
|
1683
|
+
|
|
1684
|
+
if use_threads:
|
|
1685
|
+
pool = ThreadPool(n_workers)
|
|
1686
|
+
else:
|
|
1687
|
+
pool = Pool(n_workers)
|
|
1688
|
+
|
|
1689
|
+
try:
|
|
1690
|
+
results = list(tqdm(pool.imap(
|
|
1691
|
+
partial(get_image_size,verbose=verbose),filenames), total=len(filenames)))
|
|
1692
|
+
finally:
|
|
1693
|
+
pool.close()
|
|
1694
|
+
pool.join()
|
|
1695
|
+
print('Pool closed and joined for image size retrieval')
|
|
1696
|
+
|
|
1697
|
+
assert len(filenames) == len(results), 'Internal error in parallel_get_image_sizes'
|
|
1698
|
+
|
|
1699
|
+
to_return = {}
|
|
1700
|
+
for i_file,filename in enumerate(filenames):
|
|
1701
|
+
to_return[filename] = results[i_file]
|
|
1702
|
+
|
|
1703
|
+
return to_return
|
|
1704
|
+
|
|
1705
|
+
|
|
1706
|
+
#%% Image integrity checking functions
|
|
1707
|
+
|
|
1708
|
+
def check_image_integrity(filename,modes=None):
|
|
1709
|
+
"""
|
|
1710
|
+
Check whether we can successfully load an image via OpenCV and/or PIL.
|
|
1711
|
+
|
|
1712
|
+
Args:
|
|
1713
|
+
filename (str): the filename to evaluate
|
|
1714
|
+
modes (list, optional): a list containing one or more of:
|
|
1715
|
+
|
|
1716
|
+
- 'cv'
|
|
1717
|
+
- 'pil'
|
|
1718
|
+
- 'skimage'
|
|
1719
|
+
- 'jpeg_trailer'
|
|
1720
|
+
|
|
1721
|
+
'jpeg_trailer' checks that the binary data ends with ffd9. It does not check whether
|
|
1722
|
+
the image is actually a jpeg, and even if it is, there are lots of reasons the image might not
|
|
1723
|
+
end with ffd9. It's also true the JPEGs that cause "premature end of jpeg segment" issues
|
|
1724
|
+
don't end with ffd9, so this may be a useful diagnostic. High precision, very low recall
|
|
1725
|
+
for corrupt jpegs.
|
|
1726
|
+
|
|
1727
|
+
Set to None to use all modes.
|
|
1728
|
+
|
|
1729
|
+
Returns:
|
|
1730
|
+
dict: a dict with a key called 'file' (the value of [filename]), one key for each string in
|
|
1731
|
+
[modes] (a success indicator for that mode, specifically a string starting with either
|
|
1732
|
+
'success' or 'error').
|
|
1733
|
+
"""
|
|
1734
|
+
|
|
1735
|
+
if modes is None:
|
|
1736
|
+
modes = ('cv','pil','skimage','jpeg_trailer')
|
|
1737
|
+
else:
|
|
1738
|
+
if isinstance(modes,str):
|
|
1739
|
+
modes = [modes]
|
|
1740
|
+
for mode in modes:
|
|
1741
|
+
assert mode in ('cv','pil','skimage'), 'Unrecognized mode {}'.format(mode)
|
|
1742
|
+
|
|
1743
|
+
assert os.path.isfile(filename), 'Could not find file {}'.format(filename)
|
|
1744
|
+
|
|
1745
|
+
result = {}
|
|
1746
|
+
result['file'] = filename
|
|
1747
|
+
|
|
1748
|
+
for mode in modes:
|
|
1749
|
+
|
|
1750
|
+
result[mode] = 'unknown'
|
|
1751
|
+
if mode == 'pil':
|
|
1752
|
+
try:
|
|
1753
|
+
pil_im = load_image(filename) # noqa
|
|
1754
|
+
assert pil_im is not None
|
|
1755
|
+
result[mode] = 'success'
|
|
1756
|
+
except Exception as e:
|
|
1757
|
+
result[mode] = 'error: {}'.format(str(e))
|
|
1758
|
+
elif mode == 'cv':
|
|
1759
|
+
try:
|
|
1760
|
+
cv_im = cv2.imread(filename)
|
|
1761
|
+
assert cv_im is not None, 'Unknown opencv read failure'
|
|
1762
|
+
numpy_im = np.asarray(cv_im) # noqa
|
|
1763
|
+
result[mode] = 'success'
|
|
1764
|
+
except Exception as e:
|
|
1765
|
+
result[mode] = 'error: {}'.format(str(e))
|
|
1766
|
+
elif mode == 'skimage':
|
|
1767
|
+
try:
|
|
1768
|
+
# This is not a standard dependency
|
|
1769
|
+
from skimage import io as skimage_io # type: ignore # noqa
|
|
1770
|
+
except Exception:
|
|
1771
|
+
result[mode] = 'could not import skimage, run pip install scikit-image'
|
|
1772
|
+
return result
|
|
1773
|
+
try:
|
|
1774
|
+
skimage_im = skimage_io.imread(filename) # noqa
|
|
1775
|
+
assert skimage_im is not None
|
|
1776
|
+
result[mode] = 'success'
|
|
1777
|
+
except Exception as e:
|
|
1778
|
+
result[mode] = 'error: {}'.format(str(e))
|
|
1779
|
+
elif mode == 'jpeg_trailer':
|
|
1780
|
+
# https://stackoverflow.com/a/48282863/16644970
|
|
1781
|
+
try:
|
|
1782
|
+
with open(filename, 'rb') as f:
|
|
1783
|
+
check_chars = f.read()[-2:]
|
|
1784
|
+
if check_chars != b'\xff\xd9':
|
|
1785
|
+
result[mode] = 'invalid jpeg trailer: {}'.format(str(check_chars))
|
|
1786
|
+
else:
|
|
1787
|
+
result[mode] = 'success'
|
|
1788
|
+
except Exception as e:
|
|
1789
|
+
result[mode] = 'error: {}'.format(str(e))
|
|
1790
|
+
|
|
1791
|
+
# ...for each mode
|
|
1792
|
+
|
|
1793
|
+
return result
|
|
1794
|
+
|
|
1795
|
+
# ...def check_image_integrity(...)
|
|
1796
|
+
|
|
1797
|
+
|
|
1798
|
+
def parallel_check_image_integrity(filenames,
|
|
1799
|
+
modes=None,
|
|
1800
|
+
max_workers=16,
|
|
1801
|
+
use_threads=True,
|
|
1802
|
+
recursive=True,
|
|
1803
|
+
verbose=False):
|
|
1804
|
+
"""
|
|
1805
|
+
Check whether we can successfully load a list of images via OpenCV and/or PIL.
|
|
1806
|
+
|
|
1807
|
+
Args:
|
|
1808
|
+
filenames (list or str): a list of image filenames or a folder
|
|
1809
|
+
modes (list, optional): see check_image_integrity() for documentation on the [modes] parameter
|
|
1810
|
+
max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
|
|
1811
|
+
parallelization
|
|
1812
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for
|
|
1813
|
+
parallelization
|
|
1814
|
+
recursive (bool, optional): if [filenames] is a folder, whether to search recursively for images.
|
|
1815
|
+
Ignored if [filenames] is a list.
|
|
1816
|
+
verbose (bool, optional): enable additional debug output
|
|
1817
|
+
|
|
1818
|
+
Returns:
|
|
1819
|
+
list: a list of dicts, each with a key called 'file' (the value of [filename]), one key for
|
|
1820
|
+
each string in [modes] (a success indicator for that mode, specifically a string starting
|
|
1821
|
+
with either 'success' or 'error').
|
|
1822
|
+
"""
|
|
1823
|
+
|
|
1824
|
+
if isinstance(filenames,str) and os.path.isdir(filenames):
|
|
1825
|
+
if verbose:
|
|
1826
|
+
print('Enumerating images in {}'.format(filenames))
|
|
1827
|
+
filenames = find_images(filenames,recursive=recursive,return_relative_paths=False)
|
|
1828
|
+
|
|
1829
|
+
n_workers = min(max_workers,len(filenames))
|
|
1830
|
+
|
|
1831
|
+
if verbose:
|
|
1832
|
+
print('Checking image integrity for {} filenames'.format(len(filenames)))
|
|
1833
|
+
|
|
1834
|
+
if n_workers <= 1:
|
|
1835
|
+
|
|
1836
|
+
results = []
|
|
1837
|
+
for filename in filenames:
|
|
1838
|
+
results.append(check_image_integrity(filename,modes=modes))
|
|
1839
|
+
|
|
1840
|
+
else:
|
|
1841
|
+
|
|
1842
|
+
if use_threads:
|
|
1843
|
+
pool = ThreadPool(n_workers)
|
|
1844
|
+
else:
|
|
1845
|
+
pool = Pool(n_workers)
|
|
1846
|
+
|
|
1847
|
+
results = list(tqdm(pool.imap(
|
|
1848
|
+
partial(check_image_integrity,modes=modes),filenames), total=len(filenames)))
|
|
1849
|
+
|
|
1850
|
+
return results
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
#%% Test drivers
|
|
1854
|
+
|
|
1855
|
+
if False:
|
|
1856
|
+
|
|
1857
|
+
#%% Text rendering tests
|
|
1858
|
+
|
|
1859
|
+
import os # noqa
|
|
1860
|
+
import numpy as np # noqa
|
|
1861
|
+
from megadetector.visualization.visualization_utils import \
|
|
1862
|
+
draw_bounding_boxes_on_image, exif_preserving_save, load_image, \
|
|
1863
|
+
TEXTALIGN_LEFT,TEXTALIGN_RIGHT,VTEXTALIGN_BOTTOM,VTEXTALIGN_TOP, \
|
|
1864
|
+
DEFAULT_LABEL_FONT_SIZE
|
|
1865
|
+
|
|
1866
|
+
fn = os.path.expanduser('~/AppData/Local/Temp/md-tests/md-test-images/ena24_7904.jpg')
|
|
1867
|
+
output_fn = r'g:\temp\test.jpg'
|
|
1868
|
+
|
|
1869
|
+
image = load_image(fn)
|
|
1870
|
+
|
|
1871
|
+
w = 0.2; h = 0.2
|
|
1872
|
+
all_boxes = [[0.05, 0.05, 0.25, 0.25],
|
|
1873
|
+
[0.05, 0.35, 0.25, 0.6],
|
|
1874
|
+
[0.35, 0.05, 0.6, 0.25],
|
|
1875
|
+
[0.35, 0.35, 0.6, 0.6]]
|
|
1876
|
+
|
|
1877
|
+
alignments = [
|
|
1878
|
+
[TEXTALIGN_LEFT,VTEXTALIGN_TOP],
|
|
1879
|
+
[TEXTALIGN_LEFT,VTEXTALIGN_BOTTOM],
|
|
1880
|
+
[TEXTALIGN_RIGHT,VTEXTALIGN_TOP],
|
|
1881
|
+
[TEXTALIGN_RIGHT,VTEXTALIGN_BOTTOM]
|
|
1882
|
+
]
|
|
1883
|
+
|
|
1884
|
+
labels = ['left_top','left_bottom','right_top','right_bottom']
|
|
1885
|
+
|
|
1886
|
+
text_rotation = -90
|
|
1887
|
+
n_label_copies = 2
|
|
1888
|
+
|
|
1889
|
+
for i_box,box in enumerate(all_boxes):
|
|
1890
|
+
|
|
1891
|
+
boxes = [box]
|
|
1892
|
+
boxes = np.array(boxes)
|
|
1893
|
+
classes = [i_box]
|
|
1894
|
+
display_strs = [[labels[i_box]]*n_label_copies]
|
|
1895
|
+
textalign = alignments[i_box][0]
|
|
1896
|
+
vtextalign = alignments[i_box][1]
|
|
1897
|
+
draw_bounding_boxes_on_image(image,
|
|
1898
|
+
boxes,
|
|
1899
|
+
classes,
|
|
1900
|
+
thickness=2,
|
|
1901
|
+
expansion=0,
|
|
1902
|
+
display_strs=display_strs,
|
|
1903
|
+
colormap=None,
|
|
1904
|
+
textalign=textalign,
|
|
1905
|
+
vtextalign=vtextalign,
|
|
1906
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE,
|
|
1907
|
+
text_rotation=text_rotation)
|
|
1908
|
+
|
|
1909
|
+
exif_preserving_save(image,output_fn)
|
|
1910
|
+
from megadetector.utils.path_utils import open_file
|
|
1911
|
+
open_file(output_fn)
|
|
1912
|
+
|
|
1913
|
+
|
|
1914
|
+
#%% Recursive resize test
|
|
1915
|
+
|
|
1916
|
+
from megadetector.visualization.visualization_utils import resize_image_folder # noqa
|
|
1917
|
+
|
|
1918
|
+
input_folder = r"C:\temp\resize-test\in"
|
|
1919
|
+
output_folder = r"C:\temp\resize-test\out"
|
|
1920
|
+
|
|
1921
|
+
resize_results = resize_image_folder(input_folder,output_folder,
|
|
1922
|
+
target_width=1280,verbose=True,quality=85,no_enlarge_width=True,
|
|
1923
|
+
pool_type='process',n_workers=10)
|
|
1924
|
+
|
|
1925
|
+
|
|
1926
|
+
#%% Integrity checking test
|
|
1927
|
+
|
|
1928
|
+
from megadetector.utils import md_tests
|
|
1929
|
+
options = md_tests.download_test_data()
|
|
1930
|
+
folder = options.scratch_dir
|
|
1931
|
+
|
|
1932
|
+
results = parallel_check_image_integrity(folder,max_workers=8)
|
|
1933
|
+
|
|
1934
|
+
modes = ['cv','pil','skimage','jpeg_trailer']
|
|
1935
|
+
|
|
1936
|
+
for r in results:
|
|
1937
|
+
for mode in modes:
|
|
1938
|
+
if r[mode] != 'success':
|
|
1939
|
+
s = r[mode]
|
|
1940
|
+
print('Mode {} failed for {}:\n{}\n'.format(mode,r['file'],s))
|