megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,195 @@
1
+ """
2
+
3
+ subset_json_db.py
4
+
5
+ Select a subset of images (and associated annotations) from a .json file in COCO
6
+ Camera Traps format based on a string query.
7
+
8
+ To subset .json files in the MegaDetector output format, see
9
+ subset_json_detector_output.py.
10
+
11
+ """
12
+
13
+ #%% Constants and imports
14
+
15
+ import sys
16
+ import json
17
+ import argparse
18
+
19
+ from tqdm import tqdm
20
+ from copy import copy
21
+
22
+ from megadetector.utils import ct_utils
23
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
24
+
25
+
26
+ #%% Functions
27
+
28
+ def subset_json_db(input_json,
29
+ query,
30
+ output_json=None,
31
+ ignore_case=False,
32
+ remap_categories=True,
33
+ verbose=False):
34
+ """
35
+ Given a json file (or dictionary already loaded from a json file), produce a new
36
+ database containing only the images whose filenames contain the string 'query',
37
+ optionally writing that DB output to a new json file.
38
+
39
+ Args:
40
+ input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
41
+ query (str or list): string to query for, only include images in the output whose filenames
42
+ contain this string. If this is a list, test for exact matches.
43
+ output_json (str, optional): file to write the resulting .json file to
44
+ ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
45
+ remap_categories (bool, optional): trim the category list to only the categores used
46
+ in the subset
47
+ verbose (bool, optional): enable additional debug output
48
+
49
+ Returns:
50
+ dict: CCT dictionary containing a subset of the images and annotations in the input dict
51
+ """
52
+
53
+ # Load the input file if necessary
54
+ if isinstance(input_json,str):
55
+ print('Loading input .json...')
56
+ with open(input_json, 'r') as f:
57
+ input_data = json.load(f)
58
+ else:
59
+ input_data = input_json
60
+
61
+ # Find images matching the query
62
+ images = []
63
+
64
+ if isinstance(query,str):
65
+
66
+ if ignore_case:
67
+ query = query.lower()
68
+
69
+ for im in tqdm(input_data['images']):
70
+ fn = im['file_name']
71
+ if ignore_case:
72
+ fn = fn.lower()
73
+ if query in fn:
74
+ images.append(im)
75
+
76
+ else:
77
+
78
+ query = set(query)
79
+
80
+ if ignore_case:
81
+ query = set([s.lower() for s in query])
82
+
83
+ for im in input_data['images']:
84
+ fn = im['file_name']
85
+ if ignore_case:
86
+ fn = fn.lower()
87
+ if fn in query:
88
+ images.append(im)
89
+
90
+ image_ids = set([im['id'] for im in images])
91
+
92
+ # Find annotations referring to those images
93
+ annotations = []
94
+
95
+ for ann in input_data['annotations']:
96
+ if ann['image_id'] in image_ids:
97
+ annotations.append(ann)
98
+
99
+ output_data = copy(input_data)
100
+ output_data['images'] = images
101
+ output_data['annotations'] = annotations
102
+
103
+ # Remap categories if necessary
104
+ if remap_categories:
105
+
106
+ category_ids_used = set()
107
+ for ann in annotations:
108
+ category_ids_used.add(ann['category_id'])
109
+
110
+ if verbose:
111
+ print('Keeping {} of {} categories'.format(
112
+ len(category_ids_used),len(input_data['categories'])))
113
+
114
+ input_category_id_to_output_category_id = {}
115
+
116
+ next_category_id = 0
117
+
118
+ # Build mappings from old to new category IDs
119
+ for input_category_id in category_ids_used:
120
+ assert isinstance(input_category_id,int), \
121
+ 'Illegal category ID {}'.format(input_category_id)
122
+ output_category_id = next_category_id
123
+ next_category_id = next_category_id + 1
124
+ input_category_id_to_output_category_id[input_category_id] = output_category_id
125
+
126
+ # Modify the annotations
127
+ for ann in annotations:
128
+ assert ann['category_id'] in input_category_id_to_output_category_id
129
+ ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
130
+
131
+ output_categories = []
132
+
133
+ # Re-write the category table
134
+ for cat in input_data['categories']:
135
+
136
+ if cat['id'] in input_category_id_to_output_category_id:
137
+
138
+ # There may be non-required fields, so don't just create an empty dict
139
+ # and copy the name/id field, keep the original dict other than "id"
140
+ output_category = copy(cat)
141
+ output_category['id'] = input_category_id_to_output_category_id[cat['id']]
142
+ output_categories.append(output_category)
143
+
144
+ output_categories = sort_list_of_dicts_by_key(output_categories,'id')
145
+ output_data['categories'] = output_categories
146
+
147
+ # ...if we need to remap categories
148
+
149
+ # Write the output file if requested
150
+ if output_json is not None:
151
+ if verbose:
152
+ print('Writing output .json to {}'.format(output_json))
153
+ ct_utils.write_json(output_json, output_data)
154
+
155
+ if verbose:
156
+ print('Keeping {} of {} images, {} of {} annotations'.format(
157
+ len(output_data['images']),len(input_data['images']),
158
+ len(output_data['annotations']),len(input_data['annotations'])))
159
+
160
+ return output_data
161
+
162
+
163
+ #%% Interactive driver
164
+
165
+ if False:
166
+
167
+ #%%
168
+
169
+ input_json = r"e:\Statewide_wolf_container\idfg_20190409.json"
170
+ output_json = r"e:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
171
+ query = 'clearcreek'
172
+ ignore_case = True
173
+ db = subset_json_db(input_json, query, output_json, ignore_case)
174
+
175
+
176
+ #%% Command-line driver
177
+
178
+ def main(): # noqa
179
+
180
+ parser = argparse.ArgumentParser()
181
+ parser.add_argument('input_json', type=str, help='Input file (a COCO Camera Traps .json file)')
182
+ parser.add_argument('output_json', type=str, help='Output file')
183
+ parser.add_argument('query', type=str, help='Filename query')
184
+ parser.add_argument('--ignore_case', action='store_true')
185
+
186
+ if len(sys.argv[1:]) == 0:
187
+ parser.print_help()
188
+ parser.exit()
189
+
190
+ args = parser.parse_args()
191
+
192
+ subset_json_db(args.input_json,args.query,args.output_json,args.ignore_case)
193
+
194
+ if __name__ == '__main__':
195
+ main()
@@ -0,0 +1,200 @@
1
+ """
2
+
3
+ generate_crops_from_cct.py
4
+
5
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
6
+ each bounding box.
7
+
8
+ """
9
+
10
+ #%% Imports and constants
11
+
12
+ import os
13
+ import argparse
14
+ import json
15
+
16
+ from tqdm import tqdm
17
+ from PIL import Image
18
+
19
+
20
+ #%% Functions
21
+
22
+ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=True):
23
+ """
24
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
25
+ each bounding box.
26
+
27
+ Args:
28
+ cct_file (str): the COCO .json file from which we should load data
29
+ image_dir (str): the folder where the images live; filenames in the .json
30
+ file should be relative to this folder
31
+ output_dir (str): the folder where we should write cropped images
32
+ padding (float, optional): number of pixels we should expand each box before
33
+ cropping
34
+ flat_output (bool, optional): if False, folder structure will be preserved
35
+ in the output, e.g. the image a/b/c/d.jpg will result in image files
36
+ in the output folder called, e.g., a/b/c/d_crop_000_id_12345.jpg. If
37
+ [flat_output] is True, the corresponding output image will be
38
+ a_b_c_d_crop_000_id_12345.jpg.
39
+ """
40
+
41
+ ## Read and validate input
42
+
43
+ assert os.path.isfile(cct_file)
44
+ assert os.path.isdir(image_dir)
45
+ os.makedirs(output_dir,exist_ok=True)
46
+
47
+ with open(cct_file,'r') as f:
48
+ d = json.load(f)
49
+
50
+
51
+ ## Find annotations for each image
52
+
53
+ from collections import defaultdict
54
+
55
+ # This actually maps image IDs to annotations, but only to annotations
56
+ # containing boxes
57
+ image_id_to_boxes = defaultdict(list)
58
+
59
+ n_boxes = 0
60
+
61
+ for ann in d['annotations']:
62
+ if 'bbox' in ann:
63
+ image_id_to_boxes[ann['image_id']].append(ann)
64
+ n_boxes += 1
65
+
66
+ print('Found {} boxes in {} annotations for {} images'.format(
67
+ n_boxes,len(d['annotations']),len(d['images'])))
68
+
69
+
70
+ ## Generate crops
71
+
72
+ # im = d['images'][0]
73
+ for im in tqdm(d['images']):
74
+
75
+ input_image_fn = os.path.join(image_dir,im['file_name'])
76
+ assert os.path.isfile(input_image_fn), 'Could not find image {}'.format(input_image_fn)
77
+
78
+ if im['id'] not in image_id_to_boxes:
79
+ continue
80
+
81
+ annotations_this_image = image_id_to_boxes[im['id']]
82
+
83
+ # Load the image
84
+ img = Image.open(input_image_fn)
85
+
86
+ # Generate crops
87
+ # i_ann = 0; ann = annotations_this_image[i_ann]
88
+ for i_ann,ann in enumerate(annotations_this_image):
89
+
90
+ # x/y/w/h, origin at the upper-left
91
+ bbox = ann['bbox']
92
+
93
+ xmin = bbox[0]
94
+ ymin = bbox[1]
95
+ xmax = xmin + bbox[2]
96
+ ymax = ymin + bbox[3]
97
+
98
+ xmin -= padding / 2
99
+ ymin -= padding / 2
100
+ xmax += padding / 2
101
+ ymax += padding / 2
102
+
103
+ xmin = max(xmin,0)
104
+ ymin = max(ymin,0)
105
+ # PIL's crop() method uses exclusive upper bounds for the right and lower
106
+ # edges, hence "img.width" rather than "img.width-1" here.
107
+ xmax = min(xmax,img.width)
108
+ ymax = min(ymax,img.height)
109
+
110
+ crop = img.crop(box=[xmin, ymin, xmax, ymax])
111
+
112
+ output_fn = os.path.splitext(im['file_name'])[0].replace('\\','/')
113
+ if flat_output:
114
+ output_fn = output_fn.replace('/','_')
115
+ output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + str(ann['id'])
116
+ output_fn = output_fn + '.jpg'
117
+
118
+ output_full_path = os.path.join(output_dir,output_fn)
119
+
120
+ if not flat_output:
121
+ os.makedirs(os.path.dirname(output_full_path),exist_ok=True)
122
+
123
+ crop.save(output_full_path)
124
+
125
+ # ...for each box
126
+
127
+ # ...for each image
128
+
129
+ # ...generate_crops_from_cct()
130
+
131
+
132
+ #%% Interactive driver
133
+
134
+ if False:
135
+
136
+ pass
137
+
138
+ #%%
139
+
140
+ cct_file = os.path.expanduser('~/data/noaa/noaa_estuary_fish.json')
141
+ image_dir = os.path.expanduser('~/data/noaa/JPEGImages')
142
+ padding = 50
143
+ flat_output = True
144
+ output_dir = '/home/user/tmp/noaa-fish-crops'
145
+
146
+ generate_crops_from_cct(cct_file,image_dir,output_dir,padding,flat_output=True)
147
+ files = os.listdir(output_dir)
148
+
149
+
150
+ #%% Command-line driver
151
+
152
+ def main():
153
+ """
154
+ Command-line interface to generate crops from a COCO Camera Traps .json file.
155
+ """
156
+
157
+ parser = argparse.ArgumentParser(
158
+ description='Generate cropped images from a COCO Camera Traps .json file'
159
+ )
160
+ parser.add_argument(
161
+ 'cct_file',
162
+ type=str,
163
+ help='COCO .json file to load data from'
164
+ )
165
+ parser.add_argument(
166
+ 'image_dir',
167
+ type=str,
168
+ help='Folder where images are located'
169
+ )
170
+ parser.add_argument(
171
+ 'output_dir',
172
+ type=str,
173
+ help='Folder to which we should write cropped images'
174
+ )
175
+ parser.add_argument(
176
+ '--padding',
177
+ type=int,
178
+ default=0,
179
+ help='Pixels to expand each box before cropping'
180
+ )
181
+ parser.add_argument(
182
+ '--flat_output',
183
+ action='store_true',
184
+ help='Flatten folder structure in output (preserves folder structure by default)'
185
+ )
186
+
187
+ args = parser.parse_args()
188
+
189
+ generate_crops_from_cct(
190
+ cct_file=args.cct_file,
191
+ image_dir=args.image_dir,
192
+ output_dir=args.output_dir,
193
+ padding=args.padding,
194
+ flat_output=args.flat_output
195
+ )
196
+
197
+ print(f'Generated crops in {args.output_dir}')
198
+
199
+ if __name__ == '__main__':
200
+ main()
@@ -0,0 +1,164 @@
1
+ """
2
+
3
+ get_image_sizes.py
4
+
5
+ Given a json-formatted list of image filenames, retrieves the width and height of
6
+ every image, optionally writing the results to a new .json file.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import sys
16
+
17
+ from PIL import Image
18
+
19
+ from multiprocessing.pool import ThreadPool
20
+ from multiprocessing.pool import Pool
21
+ from functools import partial
22
+ from tqdm import tqdm
23
+
24
+ from megadetector.utils.path_utils import find_images
25
+
26
+ image_base = ''
27
+ default_n_threads = 1
28
+ use_threads = False
29
+
30
+
31
+ #%% Processing functions
32
+
33
+ def _get_image_size(image_path,image_prefix=None):
34
+ """
35
+ Support function to get the size of a single image. Returns a (path,w,h) tuple.
36
+ w and h will be -1 if the image fails to load.
37
+ """
38
+
39
+ if image_prefix is not None:
40
+ full_path = os.path.join(image_prefix,image_path)
41
+ else:
42
+ full_path = image_path
43
+
44
+ # Is this image on disk?
45
+ if not os.path.isfile(full_path):
46
+ print('Could not find image {}'.format(full_path))
47
+ return (image_path,-1,-1)
48
+
49
+ try:
50
+ pil_im = Image.open(full_path)
51
+ w = pil_im.width
52
+ h = pil_im.height
53
+ return (image_path,w,h)
54
+ except Exception as e:
55
+ print('Error reading image {}: {}'.format(full_path,str(e)))
56
+ return (image_path,-1,-1)
57
+
58
+
59
+ def get_image_sizes(filenames,image_prefix=None,output_file=None,
60
+ n_workers=default_n_threads,use_threads=True,
61
+ recursive=True):
62
+ """
63
+ Gets the width and height of all images in [filenames], which can be:
64
+
65
+ * A .json-formatted file containing list of strings
66
+ * A folder
67
+ * A list of files
68
+
69
+ ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
70
+
71
+ Args:
72
+ filenames (str or list): the image filenames for which we should retrieve sizes,
73
+ can be the name of a .json-formatted file containing list of strings, a folder
74
+ in which we should enumerate images, or a list of files.
75
+ image_prefix (str, optional): optional prefix to add to images to get to full paths;
76
+ useful when [filenames] contains relative files, in which case [image_prefix] is the
77
+ base folder for the source images.
78
+ output_file (str, optional): a .json file to write the image sizes
79
+ n_workers (int, optional): number of parallel workers to use, set to <=1 to
80
+ disable parallelization
81
+ use_threads (bool, optional): whether to use threads (True) or processes (False)
82
+ for parallelization; not relevant if [n_workers] <= 1
83
+ recursive (bool, optional): only relevant if [filenames] is actually a folder,
84
+ determines whether image enumeration within that folder will be recursive
85
+
86
+ Returns:
87
+ list: list of (path,w,h) tuples
88
+ """
89
+
90
+ if output_file is not None:
91
+ output_dir = os.path.dirname(output_file)
92
+ if len(output_dir) > 0:
93
+ assert os.path.isdir(output_dir), \
94
+ 'Illegal output file {}, parent folder does not exist'.format(output_file)
95
+
96
+ if isinstance(filenames,str) and os.path.isfile(filenames):
97
+ with open(filenames,'r') as f:
98
+ filenames = json.load(f)
99
+ filenames = [s.strip() for s in filenames]
100
+ elif isinstance(filenames,str) and os.path.isdir(filenames):
101
+ filenames = find_images(filenames,recursive=recursive,
102
+ return_relative_paths=False,convert_slashes=True)
103
+ else:
104
+ assert isinstance(filenames,list)
105
+
106
+ if n_workers <= 1:
107
+
108
+ all_results = []
109
+ for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
110
+ all_results.append(_get_image_size(fn,image_prefix=image_prefix))
111
+
112
+ else:
113
+
114
+ print('Creating a pool with {} workers'.format(n_workers))
115
+ if use_threads:
116
+ pool = ThreadPool(n_workers)
117
+ else:
118
+ pool = Pool(n_workers)
119
+ # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
120
+ try:
121
+ all_results = list(tqdm(pool.imap(
122
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
123
+ finally:
124
+ pool.close()
125
+ pool.join()
126
+ print('Pool closed and joined for image size reads')
127
+
128
+ if output_file is not None:
129
+ with open(output_file,'w') as f:
130
+ json.dump(all_results,f,indent=1)
131
+
132
+ return all_results
133
+
134
+
135
+ #%% Command-line driver
136
+
137
+ def main(): # noqa
138
+
139
+ parser = argparse.ArgumentParser()
140
+ parser.add_argument('filenames',type=str,
141
+ help='Folder from which we should fetch image sizes, or .json file with a list of filenames')
142
+ parser.add_argument('output_file',type=str,
143
+ help='Output file (.json) to which we should write image size information')
144
+ parser.add_argument('--image_prefix', type=str, default=None,
145
+ help='Prefix to append to image filenames, only relevant if [filenames] points to a ' + \
146
+ 'list of relative paths')
147
+ parser.add_argument('--n_threads', type=int, default=default_n_threads,
148
+ help='Number of concurrent workers, set to <=1 to disable parallelization (default {})'.format(
149
+ default_n_threads))
150
+
151
+ if len(sys.argv[1:])==0:
152
+ parser.print_help()
153
+ parser.exit()
154
+
155
+ args = parser.parse_args()
156
+
157
+ _ = get_image_sizes(filenames=args.filenames,
158
+ output_file=args.output_file,
159
+ image_prefix=args.image_prefix,
160
+ n_workers=args.n_threads)
161
+
162
+ if __name__ == '__main__':
163
+
164
+ main()