megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,809 @@
1
+ """
2
+
3
+ read_exif.py
4
+
5
+ Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
+ and writes them to a .json or .csv file.
7
+
8
+ This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
+ can read everything). The latter approach expects that exiftool is available on the system
10
+ path. No attempt is made to be consistent in format across the two approaches.
11
+
12
+ """
13
+
14
+ #%% Imports and constants
15
+
16
+ import os
17
+ import subprocess
18
+ import json
19
+ from datetime import datetime
20
+
21
+ from multiprocessing.pool import ThreadPool as ThreadPool
22
+ from multiprocessing.pool import Pool as Pool
23
+
24
+ from tqdm import tqdm
25
+ from PIL import Image, ExifTags
26
+
27
+ from megadetector.utils.path_utils import find_images, is_executable
28
+ from megadetector.utils.ct_utils import args_to_object
29
+ from megadetector.utils.ct_utils import image_file_to_camera_folder
30
+
31
+ debug_max_images = None
32
+
33
+
34
+ #%% Options
35
+
36
+ class ReadExifOptions:
37
+ """
38
+ Parameters controlling metadata extraction.
39
+ """
40
+
41
+ def __init__(self):
42
+
43
+ #: Enable additional debug console output
44
+ self.verbose = False
45
+
46
+ #: If this is True and an output file is specified for read_exif_from_folder,
47
+ #: and we encounter a serialization issue, we'll return the results but won't
48
+ #: error.
49
+ self.allow_write_error = False
50
+
51
+ #: Number of concurrent workers, set to <= 1 to disable parallelization
52
+ self.n_workers = 1
53
+
54
+ #: Should we use threads (vs. processes) for parallelization?
55
+ #:
56
+ #: Not relevant if n_workers is <= 1.
57
+ self.use_threads = True
58
+
59
+ #: "File" and "ExifTool" are tag types used by ExifTool to report data that
60
+ #: doesn't come from EXIF, rather from the file (e.g. file size).
61
+ self.tag_types_to_ignore = set(['File','ExifTool'])
62
+
63
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
64
+ #:
65
+ #: A useful set of tags one might want to limit queries for:
66
+ #:
67
+ #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
68
+ #: 'DateTimeOriginal','Orientation']
69
+ self.tags_to_include = None
70
+
71
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
72
+ self.tags_to_exclude = None
73
+
74
+ #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
75
+ #: can be just "exiftool", in which case it should be on your system path.
76
+ self.exiftool_command_name = 'exiftool'
77
+
78
+ #: How should we handle byte-formatted EXIF tags?
79
+ #:
80
+ #: 'convert_to_string': convert to a Python string
81
+ #: 'delete': don't include at all
82
+ #: 'raw': include as a byte string
83
+ self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
84
+
85
+ #: Should we use exiftool or PIL?
86
+ self.processing_library = 'pil' # 'exiftool','pil'
87
+
88
+
89
+ class ExifResultsToCCTOptions:
90
+ """
91
+ Options controlling the behavior of exif_results_to_cct() (which reformats the datetime information)
92
+ extracted by read_exif_from_folder().
93
+ """
94
+
95
+ def __init__(self):
96
+
97
+ #: Timestamps older than this are assumed to be junk; lots of cameras use a
98
+ #: default time in 2000.
99
+ self.min_valid_timestamp_year = 2001
100
+
101
+ #: The EXIF tag from which to pull datetime information
102
+ self.exif_datetime_tag = 'DateTimeOriginal'
103
+
104
+ #: Function for extracting location information, should take a string
105
+ #: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
106
+ #: this is None, uses folder names as locations.
107
+ self.filename_to_location_function = image_file_to_camera_folder
108
+
109
+
110
+ #%% Functions
111
+
112
+ def _get_exif_ifd(exif):
113
+ """
114
+ Read EXIF data from by finding the EXIF offset and reading tags directly
115
+
116
+ https://github.com/python-pillow/Pillow/issues/5863
117
+ """
118
+
119
+ # Find the offset for all the EXIF information
120
+ for key, value in ExifTags.TAGS.items():
121
+ if value == "ExifOffset":
122
+ break
123
+ info = exif.get_ifd(key)
124
+ return {
125
+ ExifTags.TAGS.get(key, key): value
126
+ for key, value in info.items()
127
+ }
128
+
129
+
130
+ def read_pil_exif(im,options=None):
131
+ """
132
+ Read all the EXIF data we know how to read from an image, using PIL. This is primarily
133
+ an internal function; the main entry point for single-image EXIF information is
134
+ read_exif_tags_for_image().
135
+
136
+ Args:
137
+ im (str or PIL.Image.Image): image (as a filename or an Image object) from which
138
+ we should read EXIF data.
139
+
140
+ Returns:
141
+ dict: a dictionary mapping EXIF tag names to their values
142
+ """
143
+
144
+ if options is None:
145
+ options = ReadExifOptions()
146
+
147
+ image_name = '[image]'
148
+ if isinstance(im,str):
149
+ image_name = im
150
+ im = Image.open(im)
151
+
152
+ exif_tags = {}
153
+ try:
154
+ exif_info = im.getexif()
155
+ except Exception:
156
+ exif_info = None
157
+
158
+ if exif_info is None:
159
+ return exif_tags
160
+
161
+ for k, v in exif_info.items():
162
+ assert isinstance(k,str) or isinstance(k,int), \
163
+ 'Invalid EXIF key {}'.format(str(k))
164
+ if k in ExifTags.TAGS:
165
+ exif_tags[ExifTags.TAGS[k]] = str(v)
166
+ else:
167
+ # print('Warning: unrecognized EXIF tag: {}'.format(k))
168
+ exif_tags[k] = str(v)
169
+
170
+ exif_ifd_tags = _get_exif_ifd(exif_info)
171
+
172
+ for k in exif_ifd_tags.keys():
173
+ v = exif_ifd_tags[k]
174
+ if k in exif_tags:
175
+ if options.verbose:
176
+ print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
177
+ k,image_name,exif_tags[k],v))
178
+ else:
179
+ exif_tags[k] = v
180
+
181
+ exif_tag_names = list(exif_tags.keys())
182
+
183
+ # Type conversion and cleanup
184
+ #
185
+ # Most quirky types will get serialized to string when we write to .json.
186
+ for k in exif_tag_names:
187
+
188
+ if isinstance(exif_tags[k],bytes):
189
+
190
+ if options.byte_handling == 'delete':
191
+ del exif_tags[k]
192
+ elif options.byte_handling == 'raw':
193
+ pass
194
+ else:
195
+ assert options.byte_handling == 'convert_to_string'
196
+ exif_tags[k] = str(exif_tags[k])
197
+
198
+ elif isinstance(exif_tags[k],str):
199
+
200
+ exif_tags[k] = exif_tags[k].strip()
201
+
202
+ return exif_tags
203
+
204
+ # ...read_pil_exif()
205
+
206
+
207
+ def format_datetime_as_exif_datetime_string(dt):
208
+ """
209
+ Returns a Python datetime object rendered using the standard EXIF datetime
210
+ string format ('%Y:%m:%d %H:%M:%S')
211
+ """
212
+
213
+ return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
214
+
215
+
216
+ def parse_exif_datetime_string(s,verbose=False):
217
+ """"
218
+ Exif datetimes are strings, but in a standard format:
219
+
220
+ %Y:%m:%d %H:%M:%S
221
+
222
+ Parses one of those strings into a Python datetime object.
223
+
224
+ Args:
225
+ s (str): datetime string to parse, should be in standard EXIF datetime format
226
+ verbose (bool, optional): enable additional debug output
227
+
228
+ Returns:
229
+ datetime: the datetime object created from [s]
230
+ """
231
+
232
+ dt = None
233
+ try:
234
+ dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
235
+ except Exception:
236
+ if verbose:
237
+ print('Warning: could not parse datetime {}'.format(str(s)))
238
+ return dt
239
+
240
+
241
+ def _filter_tags(tags,options):
242
+ """
243
+ Internal function used to include/exclude specific tags from the exif_tags
244
+ dict.
245
+ """
246
+
247
+ if options is None:
248
+ return tags
249
+ if options.tags_to_include is None and options.tags_to_exclude is None:
250
+ return tags
251
+ if options.tags_to_include is not None:
252
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
253
+ tags_to_return = {}
254
+ for tag_name in tags.keys():
255
+ if tag_name in options.tags_to_include:
256
+ tags_to_return[tag_name] = tags[tag_name]
257
+ return tags_to_return
258
+ if options.tags_to_exclude is not None:
259
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
260
+ tags_to_return = {}
261
+ for tag_name in tags.keys():
262
+ if tag_name not in options.tags_to_exclude:
263
+ tags_to_return[tag_name] = tags[tag_name]
264
+ return tags_to_return
265
+
266
+
267
+ def read_exif_tags_for_image(file_path,options=None):
268
+ """
269
+ Get relevant fields from EXIF data for an image
270
+
271
+ Returns:
272
+ dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
273
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
274
+ options.processing_library:
275
+
276
+ - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
277
+ - For PIL, 'tags' is a dict (str:str)
278
+ """
279
+
280
+ if options is None:
281
+ options = ReadExifOptions()
282
+
283
+ result = {'status':'unknown','tags':[]}
284
+
285
+ if options.processing_library == 'pil':
286
+
287
+ try:
288
+ exif_tags = read_pil_exif(file_path,options)
289
+
290
+ except Exception as e:
291
+ if options.verbose:
292
+ print('Read failure for image {}: {}'.format(
293
+ file_path,str(e)))
294
+ result['status'] = 'read_failure'
295
+ result['error'] = str(e)
296
+
297
+ if result['status'] == 'unknown':
298
+ if exif_tags is None:
299
+ result['status'] = 'empty_read'
300
+ else:
301
+ result['status'] = 'success'
302
+ result['tags'] = _filter_tags(exif_tags,options)
303
+
304
+ return result
305
+
306
+ elif options.processing_library == 'exiftool':
307
+
308
+ # -G means "Print group name for each tag", e.g. print:
309
+ #
310
+ # [File] Bits Per Sample : 8
311
+ #
312
+ # ...instead of:
313
+ #
314
+ # Bits Per Sample : 8
315
+ proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
316
+ stdout=subprocess.PIPE, encoding='utf8')
317
+
318
+ exif_lines = proc.stdout.readlines()
319
+ exif_lines = [s.strip() for s in exif_lines]
320
+ if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
321
+ any([s.lower().startswith('[exif]') for s in exif_lines])):
322
+ result['status'] = 'failure'
323
+ return result
324
+
325
+ # A list of three-element lists (type/tag/value)
326
+ exif_tags = []
327
+
328
+ # line_raw = exif_lines[0]
329
+ for line_raw in exif_lines:
330
+
331
+ # A typical line:
332
+ #
333
+ # [ExifTool] ExifTool Version Number : 12.13
334
+
335
+ line = line_raw.strip()
336
+
337
+ # Split on the first occurrence of ":"
338
+ tokens = line.split(':',1)
339
+ assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
340
+ len(tokens))
341
+
342
+ field_value = tokens[1].strip()
343
+
344
+ field_name_type = tokens[0].strip()
345
+ field_name_type_tokens = field_name_type.split(None,1)
346
+ assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
347
+
348
+ field_type = field_name_type_tokens[0].strip()
349
+ assert field_type.startswith('[') and field_type.endswith(']'), \
350
+ 'Invalid EXIF field {}'.format(field_type)
351
+ field_type = field_type[1:-1]
352
+
353
+ if field_type in options.tag_types_to_ignore:
354
+ if options.verbose:
355
+ print('Ignoring tag with type {}'.format(field_type))
356
+ continue
357
+
358
+ field_name = field_name_type_tokens[1].strip()
359
+ if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
360
+ continue
361
+ if options.tags_to_include is not None and field_name not in options.tags_to_include:
362
+ continue
363
+ tag = [field_type,field_name,field_value]
364
+
365
+ exif_tags.append(tag)
366
+
367
+ # ...for each output line
368
+
369
+ result['status'] = 'success'
370
+ result['tags'] = exif_tags
371
+ return result
372
+
373
+ else:
374
+
375
+ raise ValueError('Unknown processing library {}'.format(
376
+ options.processing_library))
377
+
378
+ # ...which processing library are we using?
379
+
380
+ # ...read_exif_tags_for_image()
381
+
382
+
383
+ def _populate_exif_data(im, image_base, options=None):
384
+ """
385
+ Populate EXIF data into the 'exif_tags' field in the image object [im].
386
+
387
+ im['file_name'] should be prepopulated, relative to image_base.
388
+
389
+ Returns a modified version of [im], also modifies [im] in place.
390
+ """
391
+
392
+ if options is None:
393
+ options = ReadExifOptions()
394
+
395
+ fn = im['file_name']
396
+ if options.verbose:
397
+ print('Processing {}'.format(fn))
398
+
399
+ try:
400
+
401
+ file_path = os.path.join(image_base,fn)
402
+ assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
403
+ result = read_exif_tags_for_image(file_path,options)
404
+ if result['status'] == 'success':
405
+ exif_tags = result['tags']
406
+ im['exif_tags'] = exif_tags
407
+ else:
408
+ im['exif_tags'] = None
409
+ im['status'] = result['status']
410
+ if 'error' in result:
411
+ im['error'] = result['error']
412
+ if options.verbose:
413
+ print('Error reading EXIF data for {}'.format(file_path))
414
+
415
+ except Exception as e:
416
+
417
+ s = 'Error on {}: {}'.format(fn,str(e))
418
+ print(s)
419
+ im['error'] = s
420
+ im['status'] = 'read failure'
421
+ im['exif_tags'] = None
422
+
423
+ return im
424
+
425
+ # ..._populate_exif_data()
426
+
427
+
428
+ def _create_image_objects(image_files,recursive=True):
429
+ """
430
+ Create empty image objects for every image in [image_files], which can be a
431
+ list of relative paths (which will get stored without processing, so the base
432
+ path doesn't matter here), or a folder name.
433
+
434
+ Returns a list of dicts with field 'file_name' (a relative path).
435
+
436
+ "recursive" is ignored if "image_files" is a list.
437
+ """
438
+
439
+ # Enumerate *relative* paths
440
+ if isinstance(image_files,str):
441
+ print('Enumerating image files in {}'.format(image_files))
442
+ assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
443
+ image_files = find_images(image_files,
444
+ recursive=recursive,
445
+ return_relative_paths=True,
446
+ convert_slashes=True)
447
+
448
+ images = []
449
+ for fn in image_files:
450
+ im = {}
451
+ im['file_name'] = fn
452
+ images.append(im)
453
+
454
+ if debug_max_images is not None:
455
+ print('Trimming input list to {} images'.format(debug_max_images))
456
+ images = images[0:debug_max_images]
457
+
458
+ return images
459
+
460
+
461
+ def _populate_exif_for_images(image_base,images,options=None):
462
+ """
463
+ Main worker loop: read EXIF data for each image object in [images] and
464
+ populate the image objects in place.
465
+
466
+ 'images' should be a list of dicts with the field 'file_name' containing
467
+ a relative path (relative to 'image_base').
468
+ """
469
+
470
+ if options is None:
471
+ options = ReadExifOptions()
472
+
473
+ if options.n_workers == 1:
474
+
475
+ results = []
476
+ for im in tqdm(images):
477
+ results.append(_populate_exif_data(im,image_base,options))
478
+
479
+ else:
480
+
481
+ from functools import partial
482
+ if options.use_threads:
483
+ print('Starting parallel thread pool with {} workers'.format(options.n_workers))
484
+ pool = ThreadPool(options.n_workers)
485
+ else:
486
+ print('Starting parallel process pool with {} workers'.format(options.n_workers))
487
+ pool = Pool(options.n_workers)
488
+
489
+ results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
490
+ options=options),images),total=len(images)))
491
+
492
+ return results
493
+
494
+
495
+ def _write_exif_results(results,output_file):
496
+ """
497
+ Write EXIF information to [output_file].
498
+
499
+ 'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
500
+
501
+ Writes to .csv or .json depending on the extension of 'output_file'.
502
+ """
503
+
504
+ if output_file.endswith('.json'):
505
+
506
+ with open(output_file,'w') as f:
507
+ json.dump(results,f,indent=1,default=str)
508
+
509
+ elif output_file.endswith('.csv'):
510
+
511
+ # Find all EXIF tags that exist in any image
512
+ all_keys = set()
513
+ for im in results:
514
+
515
+ keys_this_image = set()
516
+ exif_tags = im['exif_tags']
517
+ file_name = im['file_name']
518
+ for tag in exif_tags:
519
+ tag_name = tag[1]
520
+ assert tag_name not in keys_this_image, \
521
+ 'Error: tag {} appears twice in image {}'.format(
522
+ tag_name,file_name)
523
+ all_keys.add(tag_name)
524
+
525
+ # ...for each tag in this image
526
+
527
+ # ...for each image
528
+
529
+ all_keys = sorted(list(all_keys))
530
+
531
+ header = ['File Name']
532
+ header.extend(all_keys)
533
+
534
+ import csv
535
+ with open(output_file,'w') as csvfile:
536
+
537
+ writer = csv.writer(csvfile)
538
+
539
+ # Write header
540
+ writer.writerow(header)
541
+
542
+ for im in results:
543
+
544
+ row = [im['file_name']]
545
+ kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
546
+
547
+ for i_key,key in enumerate(all_keys):
548
+ value = ''
549
+ if key in kvp_this_image:
550
+ value = kvp_this_image[key]
551
+ row.append(value)
552
+ # ...for each key that *might* be present in this image
553
+
554
+ assert len(row) == len(header)
555
+
556
+ writer.writerow(row)
557
+
558
+ # ...for each image
559
+
560
+ # ...with open()
561
+
562
+ else:
563
+
564
+ raise ValueError('Could not determine output type from file {}'.format(
565
+ output_file))
566
+
567
+ # ...if we're writing to .json/.csv
568
+
569
+ print('Wrote results to {}'.format(output_file))
570
+
571
+ # ..._write_exif_results(...)
572
+
573
+
574
+ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
575
+ """
576
+ Read EXIF data for a folder of images.
577
+
578
+ Args:
579
+ input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
580
+ paths
581
+ output_file (str, optional): .json file to which we should write results; if this is None, results
582
+ are returned but not written to disk
583
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
584
+ filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
585
+ a list of absolute filenames (if [input_folder] is None)
586
+ recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
587
+ is None.
588
+ verbose (bool, optional): enable additional debug output
589
+
590
+ Returns:
591
+ list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
592
+ * 'file_name': the relative path to the image
593
+ * 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
594
+ """
595
+
596
+ if options is None:
597
+ options = ReadExifOptions()
598
+
599
+ # Validate options
600
+ if options.tags_to_include is not None:
601
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
602
+ if options.tags_to_exclude is not None:
603
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
604
+
605
+ if input_folder is None:
606
+ input_folder = ''
607
+ if len(input_folder) > 0:
608
+ assert os.path.isdir(input_folder), \
609
+ '{} is not a valid folder'.format(input_folder)
610
+
611
+ assert (len(input_folder) > 0) or (filenames is not None), \
612
+ 'Must specify either a folder or a list of files'
613
+
614
+ if output_file is not None:
615
+
616
+ assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
617
+ 'I only know how to write results to .json or .csv'
618
+
619
+ try:
620
+ with open(output_file, 'a') as f:
621
+ if not f.writable():
622
+ raise IOError('File not writable')
623
+ except Exception:
624
+ print('Could not write to file {}'.format(output_file))
625
+ raise
626
+
627
+ if options.processing_library == 'exif':
628
+ assert is_executable(options.exiftool_command_name), 'exiftool not available'
629
+
630
+ if filenames is None:
631
+ images = _create_image_objects(input_folder,recursive=recursive)
632
+ else:
633
+ assert isinstance(filenames,list)
634
+ images = _create_image_objects(filenames)
635
+
636
+ results = _populate_exif_for_images(input_folder,images,options)
637
+
638
+ if output_file is not None:
639
+ try:
640
+ _write_exif_results(results,output_file)
641
+ except Exception as e:
642
+ if not options.allow_write_error:
643
+ raise
644
+ else:
645
+ print('Warning: error serializing EXIF data: {}'.format(str(e)))
646
+
647
+ return results
648
+
649
+ # ...read_exif_from_folder(...)
650
+
651
+
652
+ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
653
+ """
654
+ Given the EXIF results for a folder of images read via read_exif_from_folder,
655
+ create a COCO Camera Traps .json file that has no annotations, but
656
+ attaches image filenames to locations and datetimes.
657
+
658
+ Args:
659
+ exif_results (str or list): the filename (or loaded list) containing the results
660
+ from read_exif_from_folder
661
+ cct_file (str,optional): the filename to which we should write COCO-Camera-Traps-formatted
662
+ data
663
+
664
+ Returns:
665
+ dict: a COCO Camera Traps dict (with no annotations).
666
+ """
667
+
668
+ if options is None:
669
+ options = ExifResultsToCCTOptions()
670
+
671
+ if isinstance(exif_results,str):
672
+ print('Reading EXIF results from {}'.format(exif_results))
673
+ with open(exif_results,'r') as f:
674
+ exif_results = json.load(f)
675
+ else:
676
+ assert isinstance(exif_results,list)
677
+
678
+ now = datetime.now()
679
+
680
+ image_info = []
681
+
682
+ images_without_datetime = []
683
+ images_with_invalid_datetime = []
684
+
685
+ # exif_result = exif_results[0]
686
+ for exif_result in tqdm(exif_results):
687
+
688
+ im = {}
689
+
690
+ # By default we assume that each leaf-node folder is a location
691
+ if options.filename_to_location_function is None:
692
+ im['location'] = os.path.dirname(exif_result['file_name'])
693
+ else:
694
+ im['location'] = options.filename_to_location_function(exif_result['file_name'])
695
+
696
+ im['file_name'] = exif_result['file_name']
697
+ im['id'] = im['file_name']
698
+
699
+ if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
700
+ (options.exif_datetime_tag not in exif_result['exif_tags']):
701
+ exif_dt = None
702
+ else:
703
+ exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
704
+ exif_dt = parse_exif_datetime_string(exif_dt)
705
+ if exif_dt is None:
706
+ im['datetime'] = None
707
+ images_without_datetime.append(im['file_name'])
708
+ else:
709
+ dt = exif_dt
710
+
711
+ # An image from the future (or within the last 24 hours) is invalid
712
+ if (now - dt).total_seconds() <= 1*24*60*60:
713
+ print('Warning: datetime for {} is {}'.format(
714
+ im['file_name'],dt))
715
+ im['datetime'] = None
716
+ images_with_invalid_datetime.append(im['file_name'])
717
+
718
+ # An image from before the dawn of time is also invalid
719
+ elif dt.year < options.min_valid_timestamp_year:
720
+ print('Warning: datetime for {} is {}'.format(
721
+ im['file_name'],dt))
722
+ im['datetime'] = None
723
+ images_with_invalid_datetime.append(im['file_name'])
724
+
725
+ else:
726
+ im['datetime'] = dt
727
+
728
+ image_info.append(im)
729
+
730
+ # ...for each exif image result
731
+
732
+ print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
733
+ len(images_without_datetime),len(exif_results)))
734
+
735
+ d = {}
736
+ d['info'] = {}
737
+ d['images'] = image_info
738
+ d['annotations'] = []
739
+ d['categories'] = []
740
+
741
+ if cct_output_file is not None:
742
+ with open(cct_output_file,'w') as f:
743
+ json.dump(d,indent=1)
744
+
745
+ return d
746
+
747
+ # ...exif_results_to_cct(...)
748
+
749
+
750
+ #%% Interactive driver
751
+
752
+ if False:
753
+
754
+ #%%
755
+
756
+ input_folder = r'C:\temp\md-name-testing'
757
+ output_file = None # r'C:\temp\md-name-testing\exif.json'
758
+ options = ReadExifOptions()
759
+ options.verbose = False
760
+ options.n_workers = 10
761
+ options.use_threads = False
762
+ options.processing_library = 'pil'
763
+ # options.processing_library = 'exiftool'
764
+ options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
765
+ # options.tags_to_exclude = ['MakerNote']
766
+
767
+ results = read_exif_from_folder(input_folder,output_file,options)
768
+
769
+ #%%
770
+
771
+ with open(output_file,'r') as f:
772
+ d = json.load(f)
773
+
774
+
775
+ #%% Command-line driver
776
+
777
+ import argparse
778
+ import sys
779
+
780
+ def main():
781
+
782
+ options = ReadExifOptions()
783
+
784
+ parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
785
+ ' a folder, and write the results to .csv or .json'))
786
+
787
+ parser.add_argument('input_folder', type=str,
788
+ help='Folder of images from which we should read EXIF information')
789
+ parser.add_argument('output_file', type=str,
790
+ help='Output file (.json) to which we should write EXIF information')
791
+ parser.add_argument('--n_workers', type=int, default=1,
792
+ help='Number of concurrent workers to use (defaults to 1)')
793
+ parser.add_argument('--use_threads', action='store_true',
794
+ help='Use threads (instead of processes) for multitasking')
795
+ parser.add_argument('--processing_library', type=str, default=options.processing_library,
796
+ help='Processing library (exif or pil)')
797
+
798
+ if len(sys.argv[1:]) == 0:
799
+ parser.print_help()
800
+ parser.exit()
801
+
802
+ args = parser.parse_args()
803
+ args_to_object(args, options)
804
+ options.processing_library = options.processing_library.lower()
805
+
806
+ read_exif_from_folder(args.input_folder,args.output_file,options)
807
+
808
+ if __name__ == '__main__':
809
+ main()