megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,681 @@
1
+ """
2
+
3
+ read_exif.py
4
+
5
+ Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
+ and writes them to a .json or .csv file.
7
+
8
+ This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
+ can read everything). The latter approach expects that exiftool is available on the system
10
+ path. No attempt is made to be consistent in format across the two approaches.
11
+
12
+ """
13
+
14
+ #%% Imports and constants
15
+
16
+ import os
17
+ import subprocess
18
+ import json
19
+ from datetime import datetime
20
+
21
+ from multiprocessing.pool import ThreadPool as ThreadPool
22
+ from multiprocessing.pool import Pool as Pool
23
+
24
+ from tqdm import tqdm
25
+ from PIL import Image, ExifTags
26
+
27
+ from megadetector.utils.path_utils import find_images, is_executable
28
+ from megadetector.utils.ct_utils import args_to_object
29
+
30
+ debug_max_images = None
31
+
32
+
33
+ #%% Options
34
+
35
+ class ReadExifOptions:
36
+ """
37
+ Parameters controlling metadata extraction.
38
+ """
39
+
40
+ #: Enable additional debug console output
41
+ verbose = False
42
+
43
+ #: If this is True and an output file is specified for read_exif_from_folder,
44
+ #: and we encounter a serialization issue, we'll return the results but won't
45
+ #: error.
46
+ allow_write_error = False
47
+
48
+ #: Number of concurrent workers, set to <= 1 to disable parallelization
49
+ n_workers = 1
50
+
51
+ #: Should we use threads (vs. processes) for parallelization?
52
+ #:
53
+ #: Not relevant if n_workers is <= 1.
54
+ use_threads = True
55
+
56
+ #: "File" and "ExifTool" are tag types used by ExifTool to report data that
57
+ #: doesn't come from EXIF, rather from the file (e.g. file size).
58
+ tag_types_to_ignore = set(['File','ExifTool'])
59
+
60
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
61
+ #:
62
+ #: A useful set of tags one might want to limit queries for:
63
+ #:
64
+ #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
65
+ #: 'DateTimeOriginal','Orientation']
66
+ tags_to_include = None
67
+
68
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
69
+ tags_to_exclude = None
70
+
71
+ #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
72
+ #: can be just "exiftool", in which case it should be on your system path.
73
+ exiftool_command_name = 'exiftool'
74
+
75
+ #: How should we handle byte-formatted EXIF tags?
76
+ #:
77
+ #: 'convert_to_string': convert to a Python string
78
+ #: 'delete': don't include at all
79
+ #: 'raw': include as a byte string
80
+ byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
81
+
82
+ #: Should we use exiftool or PIL?
83
+ processing_library = 'pil' # 'exiftool','pil'
84
+
85
+
86
+ #%% Functions
87
+
88
+ def _get_exif_ifd(exif):
89
+ """
90
+ Read EXIF data from by finding the EXIF offset and reading tags directly
91
+
92
+ https://github.com/python-pillow/Pillow/issues/5863
93
+ """
94
+
95
+ # Find the offset for all the EXIF information
96
+ for key, value in ExifTags.TAGS.items():
97
+ if value == "ExifOffset":
98
+ break
99
+ info = exif.get_ifd(key)
100
+ return {
101
+ ExifTags.TAGS.get(key, key): value
102
+ for key, value in info.items()
103
+ }
104
+
105
+
106
+ def read_pil_exif(im,options=None):
107
+ """
108
+ Read all the EXIF data we know how to read from an image, using PIL. This is primarily
109
+ an internal function; the main entry point for single-image EXIF information is
110
+ read_exif_tags_for_image().
111
+
112
+ Args:
113
+ im (str or PIL.Image.Image): image (as a filename or an Image object) from which
114
+ we should read EXIF data.
115
+
116
+ Returns:
117
+ dict: a dictionary mapping EXIF tag names to their values
118
+ """
119
+
120
+ if options is None:
121
+ options = ReadExifOptions()
122
+
123
+ image_name = '[image]'
124
+ if isinstance(im,str):
125
+ image_name = im
126
+ im = Image.open(im)
127
+
128
+ exif_tags = {}
129
+ try:
130
+ exif_info = im.getexif()
131
+ except Exception:
132
+ exif_info = None
133
+
134
+ if exif_info is None:
135
+ return exif_tags
136
+
137
+ for k, v in exif_info.items():
138
+ assert isinstance(k,str) or isinstance(k,int), \
139
+ 'Invalid EXIF key {}'.format(str(k))
140
+ if k in ExifTags.TAGS:
141
+ exif_tags[ExifTags.TAGS[k]] = str(v)
142
+ else:
143
+ # print('Warning: unrecognized EXIF tag: {}'.format(k))
144
+ exif_tags[k] = str(v)
145
+
146
+ exif_ifd_tags = _get_exif_ifd(exif_info)
147
+
148
+ for k in exif_ifd_tags.keys():
149
+ v = exif_ifd_tags[k]
150
+ if k in exif_tags:
151
+ if options.verbose:
152
+ print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
153
+ k,image_name,exif_tags[k],v))
154
+ else:
155
+ exif_tags[k] = v
156
+
157
+ exif_tag_names = list(exif_tags.keys())
158
+
159
+ # Type conversion and cleanup
160
+ #
161
+ # Most quirky types will get serialized to string when we write to .json.
162
+ for k in exif_tag_names:
163
+
164
+ if isinstance(exif_tags[k],bytes):
165
+
166
+ if options.byte_handling == 'delete':
167
+ del exif_tags[k]
168
+ elif options.byte_handling == 'raw':
169
+ pass
170
+ else:
171
+ assert options.byte_handling == 'convert_to_string'
172
+ exif_tags[k] = str(exif_tags[k])
173
+
174
+ elif isinstance(exif_tags[k],str):
175
+
176
+ exif_tags[k] = exif_tags[k].strip()
177
+
178
+ return exif_tags
179
+
180
+ # ...read_pil_exif()
181
+
182
+
183
+ def format_datetime_as_exif_datetime_string(dt):
184
+ """
185
+ Returns a Python datetime object rendered using the standard EXIF datetime
186
+ string format ('%Y:%m:%d %H:%M:%S')
187
+ """
188
+
189
+ return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
190
+
191
+
192
+ def parse_exif_datetime_string(s,verbose=False):
193
+ """"
194
+ Exif datetimes are strings, but in a standard format:
195
+
196
+ %Y:%m:%d %H:%M:%S
197
+
198
+ Parses one of those strings into a Python datetime object.
199
+
200
+ Args:
201
+ s (str): datetime string to parse, should be in standard EXIF datetime format
202
+ verbose (bool, optional): enable additional debug output
203
+
204
+ Returns:
205
+ datetime: the datetime object created from [s]
206
+ """
207
+
208
+ dt = None
209
+ try:
210
+ dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
211
+ except Exception:
212
+ if verbose:
213
+ print('Warning: could not parse datetime {}'.format(str(s)))
214
+ return dt
215
+
216
+
217
+ def _filter_tags(tags,options):
218
+ """
219
+ Internal function used to include/exclude specific tags from the exif_tags
220
+ dict.
221
+ """
222
+
223
+ if options is None:
224
+ return tags
225
+ if options.tags_to_include is None and options.tags_to_exclude is None:
226
+ return tags
227
+ if options.tags_to_include is not None:
228
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
229
+ tags_to_return = {}
230
+ for tag_name in tags.keys():
231
+ if tag_name in options.tags_to_include:
232
+ tags_to_return[tag_name] = tags[tag_name]
233
+ return tags_to_return
234
+ if options.tags_to_exclude is not None:
235
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
236
+ tags_to_return = {}
237
+ for tag_name in tags.keys():
238
+ if tag_name not in options.tags_to_exclude:
239
+ tags_to_return[tag_name] = tags[tag_name]
240
+ return tags_to_return
241
+
242
+
243
+ def read_exif_tags_for_image(file_path,options=None):
244
+ """
245
+ Get relevant fields from EXIF data for an image
246
+
247
+ Returns:
248
+ dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
249
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
250
+ options.processing_library:
251
+
252
+ - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
253
+ - For PIL, 'tags' is a dict (str:str)
254
+ """
255
+
256
+ if options is None:
257
+ options = ReadExifOptions()
258
+
259
+ result = {'status':'unknown','tags':[]}
260
+
261
+ if options.processing_library == 'pil':
262
+
263
+ try:
264
+ exif_tags = read_pil_exif(file_path,options)
265
+
266
+ except Exception as e:
267
+ if options.verbose:
268
+ print('Read failure for image {}: {}'.format(
269
+ file_path,str(e)))
270
+ result['status'] = 'read_failure'
271
+ result['error'] = str(e)
272
+
273
+ if result['status'] == 'unknown':
274
+ if exif_tags is None:
275
+ result['status'] = 'empty_read'
276
+ else:
277
+ result['status'] = 'success'
278
+ result['tags'] = _filter_tags(exif_tags,options)
279
+
280
+ return result
281
+
282
+ elif options.processing_library == 'exiftool':
283
+
284
+ # -G means "Print group name for each tag", e.g. print:
285
+ #
286
+ # [File] Bits Per Sample : 8
287
+ #
288
+ # ...instead of:
289
+ #
290
+ # Bits Per Sample : 8
291
+ proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
292
+ stdout=subprocess.PIPE, encoding='utf8')
293
+
294
+ exif_lines = proc.stdout.readlines()
295
+ exif_lines = [s.strip() for s in exif_lines]
296
+ if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
297
+ any([s.lower().startswith('[exif]') for s in exif_lines])):
298
+ result['status'] = 'failure'
299
+ return result
300
+
301
+ # A list of three-element lists (type/tag/value)
302
+ exif_tags = []
303
+
304
+ # line_raw = exif_lines[0]
305
+ for line_raw in exif_lines:
306
+
307
+ # A typical line:
308
+ #
309
+ # [ExifTool] ExifTool Version Number : 12.13
310
+
311
+ line = line_raw.strip()
312
+
313
+ # Split on the first occurrence of ":"
314
+ tokens = line.split(':',1)
315
+ assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
316
+ len(tokens))
317
+
318
+ field_value = tokens[1].strip()
319
+
320
+ field_name_type = tokens[0].strip()
321
+ field_name_type_tokens = field_name_type.split(None,1)
322
+ assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
323
+
324
+ field_type = field_name_type_tokens[0].strip()
325
+ assert field_type.startswith('[') and field_type.endswith(']'), \
326
+ 'Invalid EXIF field {}'.format(field_type)
327
+ field_type = field_type[1:-1]
328
+
329
+ if field_type in options.tag_types_to_ignore:
330
+ if options.verbose:
331
+ print('Ignoring tag with type {}'.format(field_type))
332
+ continue
333
+
334
+ field_name = field_name_type_tokens[1].strip()
335
+ if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
336
+ continue
337
+ if options.tags_to_include is not None and field_name not in options.tags_to_include:
338
+ continue
339
+ tag = [field_type,field_name,field_value]
340
+
341
+ exif_tags.append(tag)
342
+
343
+ # ...for each output line
344
+
345
+ result['status'] = 'success'
346
+ result['tags'] = exif_tags
347
+ return result
348
+
349
+ else:
350
+
351
+ raise ValueError('Unknown processing library {}'.format(
352
+ options.processing_library))
353
+
354
+ # ...which processing library are we using?
355
+
356
+ # ...read_exif_tags_for_image()
357
+
358
+
359
+ def _populate_exif_data(im, image_base, options=None):
360
+ """
361
+ Populate EXIF data into the 'exif_tags' field in the image object [im].
362
+
363
+ im['file_name'] should be prepopulated, relative to image_base.
364
+
365
+ Returns a modified version of [im], also modifies [im] in place.
366
+ """
367
+
368
+ if options is None:
369
+ options = ReadExifOptions()
370
+
371
+ fn = im['file_name']
372
+ if options.verbose:
373
+ print('Processing {}'.format(fn))
374
+
375
+ try:
376
+
377
+ file_path = os.path.join(image_base,fn)
378
+ assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
379
+ result = read_exif_tags_for_image(file_path,options)
380
+ if result['status'] == 'success':
381
+ exif_tags = result['tags']
382
+ im['exif_tags'] = exif_tags
383
+ else:
384
+ im['exif_tags'] = None
385
+ im['status'] = result['status']
386
+ if 'error' in result:
387
+ im['error'] = result['error']
388
+ if options.verbose:
389
+ print('Error reading EXIF data for {}'.format(file_path))
390
+
391
+ except Exception as e:
392
+
393
+ s = 'Error on {}: {}'.format(fn,str(e))
394
+ print(s)
395
+ im['error'] = s
396
+ im['status'] = 'read failure'
397
+ im['exif_tags'] = None
398
+
399
+ return im
400
+
401
+ # ..._populate_exif_data()
402
+
403
+
404
+ def _create_image_objects(image_files,recursive=True):
405
+ """
406
+ Create empty image objects for every image in [image_files], which can be a
407
+ list of relative paths (which will get stored without processing, so the base
408
+ path doesn't matter here), or a folder name.
409
+
410
+ Returns a list of dicts with field 'file_name' (a relative path).
411
+
412
+ "recursive" is ignored if "image_files" is a list.
413
+ """
414
+
415
+ # Enumerate *relative* paths
416
+ if isinstance(image_files,str):
417
+ print('Enumerating image files in {}'.format(image_files))
418
+ assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
419
+ image_files = find_images(image_files,
420
+ recursive=recursive,
421
+ return_relative_paths=True,
422
+ convert_slashes=True)
423
+
424
+ images = []
425
+ for fn in image_files:
426
+ im = {}
427
+ im['file_name'] = fn
428
+ images.append(im)
429
+
430
+ if debug_max_images is not None:
431
+ print('Trimming input list to {} images'.format(debug_max_images))
432
+ images = images[0:debug_max_images]
433
+
434
+ return images
435
+
436
+
437
+ def _populate_exif_for_images(image_base,images,options=None):
438
+ """
439
+ Main worker loop: read EXIF data for each image object in [images] and
440
+ populate the image objects.
441
+
442
+ 'images' should be a list of dicts with the field 'file_name' containing
443
+ a relative path (relative to 'image_base').
444
+ """
445
+
446
+ if options is None:
447
+ options = ReadExifOptions()
448
+
449
+ if options.n_workers == 1:
450
+
451
+ results = []
452
+ for im in tqdm(images):
453
+ results.append(_populate_exif_data(im,image_base,options))
454
+
455
+ else:
456
+
457
+ from functools import partial
458
+ if options.use_threads:
459
+ print('Starting parallel thread pool with {} workers'.format(options.n_workers))
460
+ pool = ThreadPool(options.n_workers)
461
+ else:
462
+ print('Starting parallel process pool with {} workers'.format(options.n_workers))
463
+ pool = Pool(options.n_workers)
464
+
465
+ results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
466
+ options=options),images),total=len(images)))
467
+
468
+ return results
469
+
470
+
471
+ def _write_exif_results(results,output_file):
472
+ """
473
+ Write EXIF information to [output_file].
474
+
475
+ 'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
476
+
477
+ Writes to .csv or .json depending on the extension of 'output_file'.
478
+ """
479
+
480
+ if output_file.endswith('.json'):
481
+
482
+ with open(output_file,'w') as f:
483
+ json.dump(results,f,indent=1,default=str)
484
+
485
+ elif output_file.endswith('.csv'):
486
+
487
+ # Find all EXIF tags that exist in any image
488
+ all_keys = set()
489
+ for im in results:
490
+
491
+ keys_this_image = set()
492
+ exif_tags = im['exif_tags']
493
+ file_name = im['file_name']
494
+ for tag in exif_tags:
495
+ tag_name = tag[1]
496
+ assert tag_name not in keys_this_image, \
497
+ 'Error: tag {} appears twice in image {}'.format(
498
+ tag_name,file_name)
499
+ all_keys.add(tag_name)
500
+
501
+ # ...for each tag in this image
502
+
503
+ # ...for each image
504
+
505
+ all_keys = sorted(list(all_keys))
506
+
507
+ header = ['File Name']
508
+ header.extend(all_keys)
509
+
510
+ import csv
511
+ with open(output_file,'w') as csvfile:
512
+
513
+ writer = csv.writer(csvfile)
514
+
515
+ # Write header
516
+ writer.writerow(header)
517
+
518
+ for im in results:
519
+
520
+ row = [im['file_name']]
521
+ kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
522
+
523
+ for i_key,key in enumerate(all_keys):
524
+ value = ''
525
+ if key in kvp_this_image:
526
+ value = kvp_this_image[key]
527
+ row.append(value)
528
+ # ...for each key that *might* be present in this image
529
+
530
+ assert len(row) == len(header)
531
+
532
+ writer.writerow(row)
533
+
534
+ # ...for each image
535
+
536
+ # ...with open()
537
+
538
+ else:
539
+
540
+ raise ValueError('Could not determine output type from file {}'.format(
541
+ output_file))
542
+
543
+ # ...if we're writing to .json/.csv
544
+
545
+ print('Wrote results to {}'.format(output_file))
546
+
547
+
548
+ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
549
+ """
550
+ Read EXIF data for a folder of images.
551
+
552
+ Args:
553
+ input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
554
+ paths
555
+ output_file (str, optional): .json file to which we should write results; if this is None, results
556
+ are returned but not written to disk
557
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
558
+ filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
559
+ a list of absolute filenames (if [input_folder] is None)
560
+ recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
561
+ is None.
562
+
563
+ Returns:
564
+ dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
565
+ we're using PIL or exiftool.
566
+ """
567
+
568
+ if options is None:
569
+ options = ReadExifOptions()
570
+
571
+ # Validate options
572
+ if options.tags_to_include is not None:
573
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
574
+ if options.tags_to_exclude is not None:
575
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
576
+
577
+ if input_folder is None:
578
+ input_folder = ''
579
+ if len(input_folder) > 0:
580
+ assert os.path.isdir(input_folder), \
581
+ '{} is not a valid folder'.format(input_folder)
582
+
583
+ assert (len(input_folder) > 0) or (filenames is not None), \
584
+ 'Must specify either a folder or a list of files'
585
+
586
+ if output_file is not None:
587
+
588
+ assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
589
+ 'I only know how to write results to .json or .csv'
590
+
591
+ try:
592
+ with open(output_file, 'a') as f:
593
+ if not f.writable():
594
+ raise IOError('File not writable')
595
+ except Exception:
596
+ print('Could not write to file {}'.format(output_file))
597
+ raise
598
+
599
+ if options.processing_library == 'exif':
600
+ assert is_executable(options.exiftool_command_name), 'exiftool not available'
601
+
602
+ if filenames is None:
603
+ images = _create_image_objects(input_folder,recursive=recursive)
604
+ else:
605
+ assert isinstance(filenames,list)
606
+ images = _create_image_objects(filenames)
607
+
608
+ results = _populate_exif_for_images(input_folder,images,options)
609
+
610
+ if output_file is not None:
611
+ try:
612
+ _write_exif_results(results,output_file)
613
+ except Exception as e:
614
+ if not options.allow_write_error:
615
+ raise
616
+ else:
617
+ print('Warning: error serializing EXIF data: {}'.format(str(e)))
618
+
619
+ return results
620
+
621
+
622
+ #%% Interactive driver
623
+
624
+ if False:
625
+
626
+ #%%
627
+
628
+ input_folder = r'C:\temp\md-name-testing'
629
+ output_file = None # r'C:\temp\md-name-testing\exif.json'
630
+ options = ReadExifOptions()
631
+ options.verbose = False
632
+ options.n_workers = 10
633
+ options.use_threads = False
634
+ options.processing_library = 'pil'
635
+ # options.processing_library = 'exiftool'
636
+ options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
637
+ # options.tags_to_exclude = ['MakerNote']
638
+
639
+ results = read_exif_from_folder(input_folder,output_file,options)
640
+
641
+ #%%
642
+
643
+ with open(output_file,'r') as f:
644
+ d = json.load(f)
645
+
646
+
647
+ #%% Command-line driver
648
+
649
+ import argparse
650
+ import sys
651
+
652
+ def main():
653
+
654
+ options = ReadExifOptions()
655
+
656
+ parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
657
+ ' a folder, and write the results to .csv or .json'))
658
+
659
+ parser.add_argument('input_folder', type=str,
660
+ help='Folder of images from which we should read EXIF information')
661
+ parser.add_argument('output_file', type=str,
662
+ help='Output file (.json) to which we should write EXIF information')
663
+ parser.add_argument('--n_workers', type=int, default=1,
664
+ help='Number of concurrent workers to use (defaults to 1)')
665
+ parser.add_argument('--use_threads', action='store_true',
666
+ help='Use threads (instead of processes) for multitasking')
667
+ parser.add_argument('--processing_library', type=str, default=options.processing_library,
668
+ help='Processing library (exif or pil)')
669
+
670
+ if len(sys.argv[1:]) == 0:
671
+ parser.print_help()
672
+ parser.exit()
673
+
674
+ args = parser.parse_args()
675
+ args_to_object(args, options)
676
+ options.processing_library = options.processing_library.lower()
677
+
678
+ read_exif_from_folder(args.input_folder,args.output_file,options)
679
+
680
+ if __name__ == '__main__':
681
+ main()