megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,964 @@
1
+ """
2
+
3
+ read_exif.py
4
+
5
+ Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
+ and writes them to a .json or .csv file.
7
+
8
+ This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
+ can read everything). The latter approach expects that exiftool is available on the system
10
+ path. No attempt is made to be consistent in format across the two approaches.
11
+
12
+ """
13
+
14
+ #%% Imports and constants
15
+
16
+ import os
17
+ import subprocess
18
+ import json
19
+ import argparse
20
+ import sys
21
+
22
+ from datetime import datetime
23
+
24
+ from multiprocessing.pool import ThreadPool as ThreadPool
25
+ from multiprocessing.pool import Pool as Pool
26
+
27
+ from tqdm import tqdm
28
+ from PIL import Image, ExifTags
29
+ from functools import partial
30
+
31
+ from megadetector.utils.path_utils import find_images, is_executable
32
+ from megadetector.utils.ct_utils import args_to_object
33
+ from megadetector.utils.ct_utils import image_file_to_camera_folder
34
+ from megadetector.data_management.cct_json_utils import write_object_with_serialized_datetimes
35
+
36
+ debug_max_images = None
37
+
38
+ minimal_exif_tags = \
39
+ ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
40
+ 'DateTimeOriginal','Orientation', 'GPSInfo']
41
+
42
+
43
+ #%% Options
44
+
45
+ class ReadExifOptions:
46
+ """
47
+ Parameters controlling metadata extraction.
48
+ """
49
+
50
+ def __init__(self):
51
+
52
+ #: Enable additional debug console output
53
+ self.verbose = False
54
+
55
+ #: If this is True and an output file is specified for read_exif_from_folder,
56
+ #: and we encounter a serialization issue, we'll return the results but won't
57
+ #: error.
58
+ self.allow_write_error = False
59
+
60
+ #: Number of concurrent workers, set to <= 1 to disable parallelization
61
+ self.n_workers = 1
62
+
63
+ #: Should we use threads (vs. processes) for parallelization?
64
+ #:
65
+ #: Not relevant if n_workers is <= 1.
66
+ self.use_threads = True
67
+
68
+ #: "File" and "ExifTool" are tag types used by ExifTool to report data that
69
+ #: doesn't come from EXIF, rather from the file (e.g. file size).
70
+ self.tag_types_to_ignore = set(['File','ExifTool'])
71
+
72
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
73
+ #:
74
+ #: A useful set of tags one might want to limit queries for:
75
+ #:
76
+ #: options.tags_to_include = minimal_exif_tags
77
+ self.tags_to_include = None
78
+
79
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
80
+ self.tags_to_exclude = None
81
+
82
+ #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
83
+ #: can be just "exiftool", in which case it should be on your system path.
84
+ self.exiftool_command_name = 'exiftool'
85
+
86
+ #: How should we handle byte-formatted EXIF tags?
87
+ #:
88
+ #: 'convert_to_string': convert to a Python string
89
+ #: 'delete': don't include at all
90
+ #: 'raw': include as a byte string
91
+ self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
92
+
93
+ #: Should we use exiftool or PIL?
94
+ self.processing_library = 'pil' # 'exiftool','pil'
95
+
96
+
97
+ class ExifResultsToCCTOptions:
98
+ """
99
+ Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
100
+ information) extracted by read_exif_from_folder().
101
+ """
102
+
103
+ def __init__(self):
104
+
105
+ #: Timestamps older than this are assumed to be junk; lots of cameras use a
106
+ #: default time in 2000.
107
+ self.min_valid_timestamp_year = 2001
108
+
109
+ #: The EXIF tag from which to pull datetime information
110
+ self.exif_datetime_tag = 'DateTimeOriginal'
111
+
112
+ #: Function for extracting location information, should take a string
113
+ #: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
114
+ #: this is None, location is written as "unknown".
115
+ self.filename_to_location_function = image_file_to_camera_folder
116
+
117
+
118
+ #%% Functions
119
+
120
+ def _get_exif_ifd(exif):
121
+ """
122
+ Read EXIF data from by finding the EXIF offset and reading tags directly
123
+
124
+ https://github.com/python-pillow/Pillow/issues/5863
125
+ """
126
+
127
+ # Find the offset for all the EXIF information
128
+ for key, value in ExifTags.TAGS.items():
129
+ if value == "ExifOffset":
130
+ break
131
+ info = exif.get_ifd(key)
132
+ return {
133
+ ExifTags.TAGS.get(key, key): value
134
+ for key, value in info.items()
135
+ }
136
+
137
+
138
+ def has_gps_info(im):
139
+ """
140
+ Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
141
+ determine whether GPS location information is present in this image. Does not retrieve
142
+ location info, currently only used to determine whether it's present.
143
+
144
+ Args:
145
+ im (str, PIL.Image.Image, dict): image for which we should determine GPS metadata
146
+ presence
147
+
148
+ Returns:
149
+ bool: whether GPS metadata is present, or None if we failed to read EXIF data from
150
+ a file.
151
+ """
152
+
153
+ if isinstance(im,str) or isinstance(im,Image.Image):
154
+ exif_tags = read_pil_exif(im)
155
+ if exif_tags is None:
156
+ return None
157
+ assert isinstance(exif_tags,dict)
158
+ else:
159
+ assert isinstance(im,dict)
160
+ exif_tags = im
161
+
162
+ if 'exif_tags' in exif_tags:
163
+ exif_tags = exif_tags['exif_tags']
164
+ if exif_tags is None:
165
+ return None
166
+
167
+ if 'GPSInfo' in exif_tags and \
168
+ exif_tags['GPSInfo'] is not None and \
169
+ isinstance(exif_tags['GPSInfo'],dict):
170
+
171
+ # Don't indicate that GPS data is present if only GPS version info is present
172
+ if ('GPSLongitude' in exif_tags['GPSInfo']) or ('GPSLatitude' in exif_tags['GPSInfo']):
173
+ return True
174
+ return False
175
+
176
+ return False
177
+
178
+ # ...def has_gps_info(...)
179
+
180
+
181
+ def read_pil_exif(im,options=None):
182
+ """
183
+ Read all the EXIF data we know how to read from an image, using PIL. This is primarily
184
+ an internal function; the main entry point for single-image EXIF information is
185
+ read_exif_tags_for_image().
186
+
187
+ Args:
188
+ im (str or PIL.Image.Image): image (as a filename or an Image object) from which
189
+ we should read EXIF data.
190
+ options (ReadExifOptions, optional): see ReadExifOptions
191
+
192
+ Returns:
193
+ dict: a dictionary mapping EXIF tag names to their values
194
+ """
195
+
196
+ if options is None:
197
+ options = ReadExifOptions()
198
+
199
+ image_name = '[image]'
200
+ if isinstance(im,str):
201
+ image_name = im
202
+ im = Image.open(im)
203
+
204
+ exif_tags = {}
205
+ try:
206
+ exif_info = im.getexif()
207
+ except Exception:
208
+ exif_info = None
209
+
210
+ if exif_info is None:
211
+ return exif_tags
212
+
213
+ # Read all standard EXIF tags; if necessary, we'll filter later to a restricted
214
+ # list of tags.
215
+ for k, v in exif_info.items():
216
+ assert isinstance(k,str) or isinstance(k,int), \
217
+ 'Invalid EXIF key {}'.format(str(k))
218
+ if k in ExifTags.TAGS:
219
+ exif_tags[ExifTags.TAGS[k]] = str(v)
220
+ else:
221
+ # print('Warning: unrecognized EXIF tag: {}'.format(k))
222
+ exif_tags[k] = str(v)
223
+
224
+ exif_ifd_tags = _get_exif_ifd(exif_info)
225
+
226
+ # Read tags that are only available via offset
227
+ for k in exif_ifd_tags.keys():
228
+ v = exif_ifd_tags[k]
229
+ if k in exif_tags:
230
+ if options.verbose:
231
+ print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
232
+ k,image_name,exif_tags[k],v))
233
+ else:
234
+ exif_tags[k] = v
235
+
236
+ exif_tag_names = list(exif_tags.keys())
237
+
238
+ # Type conversion and cleanup
239
+ #
240
+ # Most quirky types will get serialized to string when we write to .json.
241
+ for k in exif_tag_names:
242
+
243
+ if isinstance(exif_tags[k],bytes):
244
+
245
+ if options.byte_handling == 'delete':
246
+ del exif_tags[k]
247
+ elif options.byte_handling == 'raw':
248
+ pass
249
+ else:
250
+ assert options.byte_handling == 'convert_to_string'
251
+ exif_tags[k] = str(exif_tags[k])
252
+
253
+ elif isinstance(exif_tags[k],str):
254
+
255
+ exif_tags[k] = exif_tags[k].strip()
256
+
257
+ # Special case for GPS info... I could decode other encoded tags, but GPS info is
258
+ # particularly important, so I'm only doing that for now.
259
+ if 'GPSInfo' in exif_tags:
260
+
261
+ try:
262
+
263
+ # Find the tag number for GPS info, in practice should alays be 34853
264
+ gpsinfo_tag = next(tag for tag, name in ExifTags.TAGS.items() if name == "GPSInfo")
265
+ assert gpsinfo_tag == 34853
266
+
267
+ # These are integer keys, e.g. {7: (14.0, 27.0, 7.24)}
268
+ gps_info_raw = exif_info.get_ifd(gpsinfo_tag)
269
+
270
+ # Convert to strings, e.g. 'GPSTimeStamp'
271
+ gps_info = {}
272
+ for int_tag,v in gps_info_raw.items():
273
+ assert isinstance(int_tag,int)
274
+ if int_tag in ExifTags.GPSTAGS:
275
+ gps_info[ExifTags.GPSTAGS[int_tag]] = v
276
+ else:
277
+ gps_info[int_tag] = v
278
+
279
+ exif_tags['GPSInfo'] = gps_info
280
+
281
+ except Exception as e:
282
+
283
+ if options.verbose:
284
+ print('Warning: error reading GPS info: {}'.format(str(e)))
285
+
286
+ # ...if we think there might be GPS tags in this image
287
+
288
+ # Filter tags if necessary
289
+ exif_tags = _filter_tags(exif_tags,options)
290
+
291
+ return exif_tags
292
+
293
+ # ...read_pil_exif()
294
+
295
+
296
+ def format_datetime_as_exif_datetime_string(dt):
297
+ """
298
+ Returns a Python datetime object rendered using the standard EXIF datetime
299
+ string format ('%Y:%m:%d %H:%M:%S')
300
+
301
+ Args:
302
+ dt (datetime): datetime object to format
303
+
304
+ Returns:
305
+ str: [dt] as a string in standard EXIF format
306
+ """
307
+
308
+ return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
309
+
310
+
311
+ def parse_exif_datetime_string(s,verbose=False):
312
+ """"
313
+ Exif datetimes are strings, but in a standard format:
314
+
315
+ %Y:%m:%d %H:%M:%S
316
+
317
+ Parses one of those strings into a Python datetime object.
318
+
319
+ Args:
320
+ s (str): datetime string to parse, should be in standard EXIF datetime format
321
+ verbose (bool, optional): enable additional debug output
322
+
323
+ Returns:
324
+ datetime: the datetime object created from [s]
325
+ """
326
+
327
+ dt = None
328
+ try:
329
+ dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
330
+ except Exception:
331
+ if verbose:
332
+ print('Warning: could not parse datetime {}'.format(str(s)))
333
+ return dt
334
+
335
+
336
+ def _filter_tags(tags,options):
337
+ """
338
+ Internal function used to include/exclude specific tags from the exif_tags
339
+ dict.
340
+ """
341
+
342
+ ## No-op cases
343
+
344
+ if options is None:
345
+ return tags
346
+
347
+ if (options.tags_to_include is None) and (options.tags_to_exclude is None):
348
+ return tags
349
+
350
+ ## If we're including specific tags
351
+
352
+ if options.tags_to_include is not None:
353
+
354
+ assert options.tags_to_exclude is None, \
355
+ "tags_to_include and tags_to_exclude are incompatible"
356
+
357
+ tags_to_include = options.tags_to_include
358
+
359
+ if isinstance(tags_to_include,str):
360
+
361
+ # Special case:return all tags
362
+ if tags_to_include == 'all':
363
+ return tags
364
+
365
+ # Otherwise convert string to list
366
+ tags_to_include = tags_to_include.split(',')
367
+
368
+ # Case-insensitive matching
369
+ tags_to_include = [s.lower().strip() for s in tags_to_include]
370
+
371
+ tags_to_return = {}
372
+
373
+ for tag_name in tags.keys():
374
+ if str(tag_name).strip().lower() in tags_to_include:
375
+ tags_to_return[tag_name] = tags[tag_name]
376
+
377
+ return tags_to_return
378
+
379
+ ## If we're excluding specific tags
380
+
381
+ if options.tags_to_exclude is not None:
382
+
383
+ assert options.tags_to_include is None, \
384
+ "tags_to_include and tags_to_exclude are incompatible"
385
+
386
+ tags_to_exclude = options.tags_to_exclude
387
+
388
+ # Convert string to list
389
+ if isinstance(tags_to_exclude,str):
390
+ tags_to_exclude = tags_to_exclude.split(',')
391
+
392
+ # Case-insensitive matching
393
+ tags_to_exclude = [s.lower().strip() for s in tags_to_exclude]
394
+
395
+ tags_to_return = {}
396
+ for tag_name in tags.keys():
397
+ if str(tag_name).strip().lower() not in tags_to_exclude:
398
+ tags_to_return[tag_name] = tags[tag_name]
399
+
400
+ return tags_to_return
401
+
402
+ # ...def _filter_tags(...)
403
+
404
+
405
+ def read_exif_tags_for_image(file_path,options=None):
406
+ """
407
+ Get relevant fields from EXIF data for an image
408
+
409
+ Args:
410
+ file_path (str): image from which we should read EXIF data
411
+ options (ReadExifOptions, optional): see ReadExifOptions
412
+
413
+ Returns:
414
+ dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
415
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
416
+ options.processing_library:
417
+
418
+ - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
419
+ - For PIL, 'tags' is a dict (str:str)
420
+ """
421
+
422
+ if options is None:
423
+ options = ReadExifOptions()
424
+
425
+ result = {'status':'unknown','tags':[]}
426
+
427
+ if options.processing_library == 'pil':
428
+
429
+ try:
430
+ exif_tags = read_pil_exif(file_path,options)
431
+
432
+ except Exception as e:
433
+ if options.verbose:
434
+ print('PIL EXIF read failure for image {}: {}'.format(
435
+ file_path,str(e)))
436
+ import traceback
437
+ traceback.print_exc()
438
+ result['status'] = 'read_failure'
439
+ result['error'] = str(e)
440
+
441
+ if result['status'] == 'unknown':
442
+ if exif_tags is None:
443
+ result['status'] = 'empty_read'
444
+ else:
445
+ result['status'] = 'success'
446
+ result['tags'] = _filter_tags(exif_tags,options)
447
+
448
+ return result
449
+
450
+ elif options.processing_library == 'exiftool':
451
+
452
+ # -G means "Print group name for each tag", e.g. print:
453
+ #
454
+ # [File] Bits Per Sample : 8
455
+ #
456
+ # ...instead of:
457
+ #
458
+ # Bits Per Sample : 8
459
+ proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
460
+ stdout=subprocess.PIPE, encoding='utf8')
461
+
462
+ exif_lines = proc.stdout.readlines()
463
+ exif_lines = [s.strip() for s in exif_lines]
464
+ if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
465
+ any([s.lower().startswith('[exif]') for s in exif_lines])):
466
+ result['status'] = 'failure'
467
+ return result
468
+
469
+ # A list of three-element lists (type/tag/value)
470
+ exif_tags = []
471
+
472
+ # line_raw = exif_lines[0]
473
+ for line_raw in exif_lines:
474
+
475
+ # A typical line:
476
+ #
477
+ # [ExifTool] ExifTool Version Number : 12.13
478
+
479
+ line = line_raw.strip()
480
+
481
+ # Split on the first occurrence of ":"
482
+ tokens = line.split(':',1)
483
+ assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
484
+ len(tokens))
485
+
486
+ field_value = tokens[1].strip()
487
+
488
+ field_name_type = tokens[0].strip()
489
+ field_name_type_tokens = field_name_type.split(None,1)
490
+ assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
491
+
492
+ field_type = field_name_type_tokens[0].strip()
493
+ assert field_type.startswith('[') and field_type.endswith(']'), \
494
+ 'Invalid EXIF field {}'.format(field_type)
495
+ field_type = field_type[1:-1]
496
+
497
+ if field_type in options.tag_types_to_ignore:
498
+ if options.verbose:
499
+ print('Ignoring tag with type {}'.format(field_type))
500
+ continue
501
+
502
+ field_name = field_name_type_tokens[1].strip()
503
+ if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
504
+ continue
505
+ if options.tags_to_include is not None and field_name not in options.tags_to_include:
506
+ continue
507
+ tag = [field_type,field_name,field_value]
508
+
509
+ exif_tags.append(tag)
510
+
511
+ # ...for each output line
512
+
513
+ result['status'] = 'success'
514
+ result['tags'] = exif_tags
515
+ return result
516
+
517
+ else:
518
+
519
+ raise ValueError('Unknown processing library {}'.format(
520
+ options.processing_library))
521
+
522
+ # ...which processing library are we using?
523
+
524
+ # ...read_exif_tags_for_image()
525
+
526
+
527
+ def _populate_exif_data(im, image_base, options=None):
528
+ """
529
+ Populate EXIF data into the 'exif_tags' field in the image object [im].
530
+
531
+ im['file_name'] should be prepopulated, relative to image_base.
532
+
533
+ Returns a modified version of [im], also modifies [im] in place.
534
+ """
535
+
536
+ if options is None:
537
+ options = ReadExifOptions()
538
+
539
+ fn = im['file_name']
540
+ if options.verbose:
541
+ print('Processing {}'.format(fn))
542
+
543
+ try:
544
+
545
+ file_path = os.path.join(image_base,fn)
546
+ assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
547
+ result = read_exif_tags_for_image(file_path,options)
548
+ if result['status'] == 'success':
549
+ exif_tags = result['tags']
550
+ im['exif_tags'] = exif_tags
551
+ else:
552
+ im['exif_tags'] = None
553
+ im['status'] = result['status']
554
+ if 'error' in result:
555
+ im['error'] = result['error']
556
+ if options.verbose:
557
+ print('Error reading EXIF data for {}'.format(file_path))
558
+
559
+ except Exception as e:
560
+
561
+ s = 'Error on {}: {}'.format(fn,str(e))
562
+ print(s)
563
+ im['error'] = s
564
+ im['status'] = 'read failure'
565
+ im['exif_tags'] = None
566
+
567
+ return im
568
+
569
+ # ..._populate_exif_data()
570
+
571
+
572
+ def _create_image_objects(image_files,recursive=True):
573
+ """
574
+ Create empty image objects for every image in [image_files], which can be a
575
+ list of relative paths (which will get stored without processing, so the base
576
+ path doesn't matter here), or a folder name.
577
+
578
+ Returns a list of dicts with field 'file_name' (a relative path).
579
+
580
+ "recursive" is ignored if "image_files" is a list.
581
+ """
582
+
583
+ # Enumerate *relative* paths
584
+ if isinstance(image_files,str):
585
+ print('Enumerating image files in {}'.format(image_files))
586
+ assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
587
+ image_files = find_images(image_files,
588
+ recursive=recursive,
589
+ return_relative_paths=True,
590
+ convert_slashes=True)
591
+
592
+ images = []
593
+ for fn in image_files:
594
+ im = {}
595
+ im['file_name'] = fn
596
+ images.append(im)
597
+
598
+ if debug_max_images is not None:
599
+ print('Trimming input list to {} images'.format(debug_max_images))
600
+ images = images[0:debug_max_images]
601
+
602
+ return images
603
+
604
+
605
+ def _populate_exif_for_images(image_base,images,options=None):
606
+ """
607
+ Main worker loop: read EXIF data for each image object in [images] and
608
+ populate the image objects in place.
609
+
610
+ 'images' should be a list of dicts with the field 'file_name' containing
611
+ a relative path (relative to 'image_base').
612
+ """
613
+
614
+ if options is None:
615
+ options = ReadExifOptions()
616
+
617
+ if options.n_workers == 1:
618
+
619
+ results = []
620
+ for im in tqdm(images):
621
+ results.append(_populate_exif_data(im,image_base,options))
622
+
623
+ else:
624
+
625
+ pool = None
626
+ try:
627
+ if options.use_threads:
628
+ print('Starting parallel thread pool with {} workers'.format(options.n_workers))
629
+ pool = ThreadPool(options.n_workers)
630
+ else:
631
+ print('Starting parallel process pool with {} workers'.format(options.n_workers))
632
+ pool = Pool(options.n_workers)
633
+
634
+ results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
635
+ options=options),images),total=len(images)))
636
+ finally:
637
+ if pool is not None:
638
+ pool.close()
639
+ pool.join()
640
+ print('Pool closed and joined for EXIF extraction')
641
+
642
+ return results
643
+
644
+
645
+ def _write_exif_results(results,output_file):
646
+ """
647
+ Write EXIF information to [output_file].
648
+
649
+ 'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
650
+
651
+ Writes to .csv or .json depending on the extension of 'output_file'.
652
+ """
653
+
654
+ if output_file.endswith('.json'):
655
+
656
+ with open(output_file,'w') as f:
657
+ json.dump(results,f,indent=1,default=str)
658
+
659
+ elif output_file.endswith('.csv'):
660
+
661
+ # Find all EXIF tags that exist in any image
662
+ all_keys = set()
663
+ for im in results:
664
+
665
+ keys_this_image = set()
666
+ exif_tags = im['exif_tags']
667
+ file_name = im['file_name']
668
+ for tag in exif_tags:
669
+ tag_name = tag[1]
670
+ assert tag_name not in keys_this_image, \
671
+ 'Error: tag {} appears twice in image {}'.format(
672
+ tag_name,file_name)
673
+ all_keys.add(tag_name)
674
+
675
+ # ...for each tag in this image
676
+
677
+ # ...for each image
678
+
679
+ all_keys = sorted(list(all_keys))
680
+
681
+ header = ['File Name']
682
+ header.extend(all_keys)
683
+
684
+ import csv
685
+ with open(output_file,'w') as csvfile:
686
+
687
+ writer = csv.writer(csvfile)
688
+
689
+ # Write header
690
+ writer.writerow(header)
691
+
692
+ for im in results:
693
+
694
+ row = [im['file_name']]
695
+ kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
696
+
697
+ for i_key,key in enumerate(all_keys):
698
+ value = ''
699
+ if key in kvp_this_image:
700
+ value = kvp_this_image[key]
701
+ row.append(value)
702
+ # ...for each key that *might* be present in this image
703
+
704
+ assert len(row) == len(header)
705
+
706
+ writer.writerow(row)
707
+
708
+ # ...for each image
709
+
710
+ # ...with open()
711
+
712
+ else:
713
+
714
+ raise ValueError('Could not determine output type from file {}'.format(
715
+ output_file))
716
+
717
+ # ...if we're writing to .json/.csv
718
+
719
+ print('Wrote results to {}'.format(output_file))
720
+
721
+ # ..._write_exif_results(...)
722
+
723
+
724
+ def read_exif_from_folder(input_folder,
725
+ output_file=None,
726
+ options=None,
727
+ filenames=None,
728
+ recursive=True):
729
+ """
730
+ Read EXIF data for a folder of images.
731
+
732
+ Args:
733
+ input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
734
+ paths
735
+ output_file (str, optional): .json file to which we should write results; if this is None, results
736
+ are returned but not written to disk
737
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
738
+ filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
739
+ a list of absolute filenames (if [input_folder] is None)
740
+ recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
741
+ is None.
742
+
743
+ Returns:
744
+ list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
745
+ * 'file_name': the relative path to the image
746
+ * 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
747
+ * 'status' and 'error': only populated when reading EXIF information for an image fails
748
+ """
749
+
750
+ if options is None:
751
+ options = ReadExifOptions()
752
+
753
+ # Validate options
754
+ if options.tags_to_include is not None:
755
+ assert options.tags_to_exclude is None, \
756
+ "tags_to_include and tags_to_exclude are incompatible"
757
+ if options.tags_to_exclude is not None:
758
+ assert options.tags_to_include is None, \
759
+ "tags_to_include and tags_to_exclude are incompatible"
760
+
761
+ if input_folder is None:
762
+ input_folder = ''
763
+ if len(input_folder) > 0:
764
+ assert os.path.isdir(input_folder), \
765
+ '{} is not a valid folder'.format(input_folder)
766
+
767
+ assert (len(input_folder) > 0) or (filenames is not None), \
768
+ 'Must specify either a folder or a list of files'
769
+
770
+ if output_file is not None:
771
+
772
+ assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
773
+ 'I only know how to write results to .json or .csv'
774
+
775
+ try:
776
+ with open(output_file, 'a') as f:
777
+ if not f.writable():
778
+ raise OSError('File not writable')
779
+ except Exception:
780
+ print('Could not write to file {}'.format(output_file))
781
+ raise
782
+
783
+ if options.processing_library == 'exiftool':
784
+ assert is_executable(options.exiftool_command_name), 'exiftool not available'
785
+
786
+ if filenames is None:
787
+ images = _create_image_objects(input_folder,recursive=recursive)
788
+ else:
789
+ assert isinstance(filenames,list)
790
+ images = _create_image_objects(filenames)
791
+
792
+ results = _populate_exif_for_images(input_folder,images,options)
793
+
794
+ if output_file is not None:
795
+ try:
796
+ _write_exif_results(results,output_file)
797
+ except Exception as e:
798
+ if not options.allow_write_error:
799
+ raise
800
+ else:
801
+ print('Warning: error serializing EXIF data: {}'.format(str(e)))
802
+
803
+ return results
804
+
805
+ # ...read_exif_from_folder(...)
806
+
807
+
808
+ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
809
+ """
810
+ Given the EXIF results for a folder of images read via read_exif_from_folder,
811
+ create a COCO Camera Traps .json file that has no annotations, but
812
+ attaches image filenames to locations and datetimes.
813
+
814
+ Args:
815
+ exif_results (str or list): the filename (or loaded list) containing the results
816
+ from read_exif_from_folder
817
+ cct_output_file (str, optional): the filename to which we should write
818
+ COCO-Camera-Traps-formatted data
819
+ options (ExifResultsToCCTOptions, optional): options guiding the generation
820
+ of the CCT file, particularly location mapping
821
+
822
+ Returns:
823
+ dict: a COCO Camera Traps dict (with no annotations).
824
+ """
825
+
826
+ if options is None:
827
+ options = ExifResultsToCCTOptions()
828
+
829
+ if isinstance(exif_results,str):
830
+ print('Reading EXIF results from {}'.format(exif_results))
831
+ with open(exif_results,'r') as f:
832
+ exif_results = json.load(f)
833
+ else:
834
+ assert isinstance(exif_results,list)
835
+
836
+ now = datetime.now()
837
+
838
+ image_info = []
839
+
840
+ images_without_datetime = []
841
+ images_with_invalid_datetime = []
842
+
843
+ # exif_result = exif_results[0]
844
+ for exif_result in tqdm(exif_results):
845
+
846
+ im = {}
847
+
848
+ # By default we assume that each leaf-node folder is a location
849
+ if options.filename_to_location_function is None:
850
+ im['location'] = 'unknown'
851
+ else:
852
+ im['location'] = options.filename_to_location_function(exif_result['file_name'])
853
+
854
+ im['file_name'] = exif_result['file_name']
855
+ im['id'] = im['file_name']
856
+
857
+ if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
858
+ (options.exif_datetime_tag not in exif_result['exif_tags']):
859
+ exif_dt = None
860
+ else:
861
+ exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
862
+ exif_dt = parse_exif_datetime_string(exif_dt)
863
+ if exif_dt is None:
864
+ im['datetime'] = None
865
+ images_without_datetime.append(im['file_name'])
866
+ else:
867
+ dt = exif_dt
868
+
869
+ # An image from the future (or within the last 24 hours) is invalid
870
+ if (now - dt).total_seconds() <= 1*24*60*60:
871
+ print('Warning: datetime for {} is {}'.format(
872
+ im['file_name'],dt))
873
+ im['datetime'] = None
874
+ images_with_invalid_datetime.append(im['file_name'])
875
+
876
+ # An image from before the dawn of time is also invalid
877
+ elif dt.year < options.min_valid_timestamp_year:
878
+ print('Warning: datetime for {} is {}'.format(
879
+ im['file_name'],dt))
880
+ im['datetime'] = None
881
+ images_with_invalid_datetime.append(im['file_name'])
882
+
883
+ else:
884
+ im['datetime'] = dt
885
+
886
+ image_info.append(im)
887
+
888
+ # ...for each exif image result
889
+
890
+ print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
891
+ len(images_without_datetime),len(exif_results)))
892
+
893
+ d = {}
894
+ d['info'] = {}
895
+ d['images'] = image_info
896
+ d['annotations'] = []
897
+ d['categories'] = []
898
+
899
+ if cct_output_file is not None:
900
+ write_object_with_serialized_datetimes(d,cct_output_file)
901
+
902
+ return d
903
+
904
+ # ...exif_results_to_cct(...)
905
+
906
+
907
+ #%% Interactive driver
908
+
909
+ if False:
910
+
911
+ #%%
912
+
913
+ input_folder = r'C:\temp\md-name-testing'
914
+ output_file = None # r'C:\temp\md-name-testing\exif.json'
915
+ options = ReadExifOptions()
916
+ options.verbose = False
917
+ options.n_workers = 10
918
+ options.use_threads = False
919
+ options.processing_library = 'pil'
920
+ # options.processing_library = 'exiftool'
921
+ options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
922
+ 'ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
923
+ # options.tags_to_exclude = ['MakerNote']
924
+
925
+ results = read_exif_from_folder(input_folder,output_file,options)
926
+
927
+ #%%
928
+
929
+ with open(output_file,'r') as f:
930
+ d = json.load(f)
931
+
932
+
933
+ #%% Command-line driver
934
+
935
+ def main(): # noqa
936
+
937
+ options = ReadExifOptions()
938
+
939
+ parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
940
+ ' a folder, and write the results to .csv or .json'))
941
+
942
+ parser.add_argument('input_folder', type=str,
943
+ help='Folder of images from which we should read EXIF information')
944
+ parser.add_argument('output_file', type=str,
945
+ help='Output file (.json) to which we should write EXIF information')
946
+ parser.add_argument('--n_workers', type=int, default=1,
947
+ help='Number of concurrent workers to use (defaults to 1)')
948
+ parser.add_argument('--use_threads', action='store_true',
949
+ help='Use threads (instead of processes) for multitasking')
950
+ parser.add_argument('--processing_library', type=str, default=options.processing_library,
951
+ help='Processing library (exif or pil)')
952
+
953
+ if len(sys.argv[1:]) == 0:
954
+ parser.print_help()
955
+ parser.exit()
956
+
957
+ args = parser.parse_args()
958
+ args_to_object(args, options)
959
+ options.processing_library = options.processing_library.lower()
960
+
961
+ read_exif_from_folder(args.input_folder,args.output_file,options)
962
+
963
+ if __name__ == '__main__':
964
+ main()