megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,681 +0,0 @@
1
- """
2
-
3
- read_exif.py
4
-
5
- Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
- and writes them to a .json or .csv file.
7
-
8
- This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
- can read everything). The latter approach expects that exiftool is available on the system
10
- path. No attempt is made to be consistent in format across the two approaches.
11
-
12
- """
13
-
14
- #%% Imports and constants
15
-
16
- import os
17
- import subprocess
18
- import json
19
- from datetime import datetime
20
-
21
- from multiprocessing.pool import ThreadPool as ThreadPool
22
- from multiprocessing.pool import Pool as Pool
23
-
24
- from tqdm import tqdm
25
- from PIL import Image, ExifTags
26
-
27
- from md_utils.path_utils import find_images, is_executable
28
- from md_utils.ct_utils import args_to_object
29
-
30
- debug_max_images = None
31
-
32
-
33
- #%% Options
34
-
35
- class ReadExifOptions:
36
- """
37
- Parameters controlling metadata extraction.
38
- """
39
-
40
- #: Enable additional debug console output
41
- verbose = False
42
-
43
- #: If this is True and an output file is specified for read_exif_from_folder,
44
- #: and we encounter a serialization issue, we'll return the results but won't
45
- #: error.
46
- allow_write_error = False
47
-
48
- #: Number of concurrent workers, set to <= 1 to disable parallelization
49
- n_workers = 1
50
-
51
- #: Should we use threads (vs. processes) for parallelization?
52
- #:
53
- #: Not relevant if n_workers is <= 1.
54
- use_threads = True
55
-
56
- #: "File" and "ExifTool" are tag types used by ExifTool to report data that
57
- #: doesn't come from EXIF, rather from the file (e.g. file size).
58
- tag_types_to_ignore = set(['File','ExifTool'])
59
-
60
- #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
61
- #:
62
- #: A useful set of tags one might want to limit queries for:
63
- #:
64
- #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
65
- #: 'DateTimeOriginal','Orientation']
66
- tags_to_include = None
67
-
68
- #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
69
- tags_to_exclude = None
70
-
71
- #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
72
- #: can be just "exiftool", in which case it should be on your system path.
73
- exiftool_command_name = 'exiftool'
74
-
75
- #: How should we handle byte-formatted EXIF tags?
76
- #:
77
- #: 'convert_to_string': convert to a Python string
78
- #: 'delete': don't include at all
79
- #: 'raw': include as a byte string
80
- byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
81
-
82
- #: Should we use exiftool or PIL?
83
- processing_library = 'pil' # 'exiftool','pil'
84
-
85
-
86
- #%% Functions
87
-
88
- def _get_exif_ifd(exif):
89
- """
90
- Read EXIF data from by finding the EXIF offset and reading tags directly
91
-
92
- https://github.com/python-pillow/Pillow/issues/5863
93
- """
94
-
95
- # Find the offset for all the EXIF information
96
- for key, value in ExifTags.TAGS.items():
97
- if value == "ExifOffset":
98
- break
99
- info = exif.get_ifd(key)
100
- return {
101
- ExifTags.TAGS.get(key, key): value
102
- for key, value in info.items()
103
- }
104
-
105
-
106
- def read_pil_exif(im,options=None):
107
- """
108
- Read all the EXIF data we know how to read from an image, using PIL. This is primarily
109
- an internal function; the main entry point for single-image EXIF information is
110
- read_exif_tags_for_image().
111
-
112
- Args:
113
- im (str or PIL.Image.Image): image (as a filename or an Image object) from which
114
- we should read EXIF data.
115
-
116
- Returns:
117
- dict: a dictionary mapping EXIF tag names to their values
118
- """
119
-
120
- if options is None:
121
- options = ReadExifOptions()
122
-
123
- image_name = '[image]'
124
- if isinstance(im,str):
125
- image_name = im
126
- im = Image.open(im)
127
-
128
- exif_tags = {}
129
- try:
130
- exif_info = im.getexif()
131
- except Exception:
132
- exif_info = None
133
-
134
- if exif_info is None:
135
- return exif_tags
136
-
137
- for k, v in exif_info.items():
138
- assert isinstance(k,str) or isinstance(k,int), \
139
- 'Invalid EXIF key {}'.format(str(k))
140
- if k in ExifTags.TAGS:
141
- exif_tags[ExifTags.TAGS[k]] = str(v)
142
- else:
143
- # print('Warning: unrecognized EXIF tag: {}'.format(k))
144
- exif_tags[k] = str(v)
145
-
146
- exif_ifd_tags = _get_exif_ifd(exif_info)
147
-
148
- for k in exif_ifd_tags.keys():
149
- v = exif_ifd_tags[k]
150
- if k in exif_tags:
151
- if options.verbose:
152
- print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
153
- k,image_name,exif_tags[k],v))
154
- else:
155
- exif_tags[k] = v
156
-
157
- exif_tag_names = list(exif_tags.keys())
158
-
159
- # Type conversion and cleanup
160
- #
161
- # Most quirky types will get serialized to string when we write to .json.
162
- for k in exif_tag_names:
163
-
164
- if isinstance(exif_tags[k],bytes):
165
-
166
- if options.byte_handling == 'delete':
167
- del exif_tags[k]
168
- elif options.byte_handling == 'raw':
169
- pass
170
- else:
171
- assert options.byte_handling == 'convert_to_string'
172
- exif_tags[k] = str(exif_tags[k])
173
-
174
- elif isinstance(exif_tags[k],str):
175
-
176
- exif_tags[k] = exif_tags[k].strip()
177
-
178
- return exif_tags
179
-
180
- # ...read_pil_exif()
181
-
182
-
183
- def format_datetime_as_exif_datetime_string(dt):
184
- """
185
- Returns a Python datetime object rendered using the standard EXIF datetime
186
- string format ('%Y:%m:%d %H:%M:%S')
187
- """
188
-
189
- return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
190
-
191
-
192
- def parse_exif_datetime_string(s,verbose=False):
193
- """"
194
- Exif datetimes are strings, but in a standard format:
195
-
196
- %Y:%m:%d %H:%M:%S
197
-
198
- Parses one of those strings into a Python datetime object.
199
-
200
- Args:
201
- s (str): datetime string to parse, should be in standard EXIF datetime format
202
- verbose (bool, optional): enable additional debug output
203
-
204
- Returns:
205
- datetime: the datetime object created from [s]
206
- """
207
-
208
- dt = None
209
- try:
210
- dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
211
- except Exception:
212
- if verbose:
213
- print('Warning: could not parse datetime {}'.format(str(s)))
214
- return dt
215
-
216
-
217
- def _filter_tags(tags,options):
218
- """
219
- Internal function used to include/exclude specific tags from the exif_tags
220
- dict.
221
- """
222
-
223
- if options is None:
224
- return tags
225
- if options.tags_to_include is None and options.tags_to_exclude is None:
226
- return tags
227
- if options.tags_to_include is not None:
228
- assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
229
- tags_to_return = {}
230
- for tag_name in tags.keys():
231
- if tag_name in options.tags_to_include:
232
- tags_to_return[tag_name] = tags[tag_name]
233
- return tags_to_return
234
- if options.tags_to_exclude is not None:
235
- assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
236
- tags_to_return = {}
237
- for tag_name in tags.keys():
238
- if tag_name not in options.tags_to_exclude:
239
- tags_to_return[tag_name] = tags[tag_name]
240
- return tags_to_return
241
-
242
-
243
- def read_exif_tags_for_image(file_path,options=None):
244
- """
245
- Get relevant fields from EXIF data for an image
246
-
247
- Returns:
248
- dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
249
- options (ReadExifOptions, optional): parameters controlling metadata extraction
250
- options.processing_library:
251
-
252
- - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
253
- - For PIL, 'tags' is a dict (str:str)
254
- """
255
-
256
- if options is None:
257
- options = ReadExifOptions()
258
-
259
- result = {'status':'unknown','tags':[]}
260
-
261
- if options.processing_library == 'pil':
262
-
263
- try:
264
- exif_tags = read_pil_exif(file_path,options)
265
-
266
- except Exception as e:
267
- if options.verbose:
268
- print('Read failure for image {}: {}'.format(
269
- file_path,str(e)))
270
- result['status'] = 'read_failure'
271
- result['error'] = str(e)
272
-
273
- if result['status'] == 'unknown':
274
- if exif_tags is None:
275
- result['status'] = 'empty_read'
276
- else:
277
- result['status'] = 'success'
278
- result['tags'] = _filter_tags(exif_tags,options)
279
-
280
- return result
281
-
282
- elif options.processing_library == 'exiftool':
283
-
284
- # -G means "Print group name for each tag", e.g. print:
285
- #
286
- # [File] Bits Per Sample : 8
287
- #
288
- # ...instead of:
289
- #
290
- # Bits Per Sample : 8
291
- proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
292
- stdout=subprocess.PIPE, encoding='utf8')
293
-
294
- exif_lines = proc.stdout.readlines()
295
- exif_lines = [s.strip() for s in exif_lines]
296
- if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
297
- any([s.lower().startswith('[exif]') for s in exif_lines])):
298
- result['status'] = 'failure'
299
- return result
300
-
301
- # A list of three-element lists (type/tag/value)
302
- exif_tags = []
303
-
304
- # line_raw = exif_lines[0]
305
- for line_raw in exif_lines:
306
-
307
- # A typical line:
308
- #
309
- # [ExifTool] ExifTool Version Number : 12.13
310
-
311
- line = line_raw.strip()
312
-
313
- # Split on the first occurrence of ":"
314
- tokens = line.split(':',1)
315
- assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
316
- len(tokens))
317
-
318
- field_value = tokens[1].strip()
319
-
320
- field_name_type = tokens[0].strip()
321
- field_name_type_tokens = field_name_type.split(None,1)
322
- assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
323
-
324
- field_type = field_name_type_tokens[0].strip()
325
- assert field_type.startswith('[') and field_type.endswith(']'), \
326
- 'Invalid EXIF field {}'.format(field_type)
327
- field_type = field_type[1:-1]
328
-
329
- if field_type in options.tag_types_to_ignore:
330
- if options.verbose:
331
- print('Ignoring tag with type {}'.format(field_type))
332
- continue
333
-
334
- field_name = field_name_type_tokens[1].strip()
335
- if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
336
- continue
337
- if options.tags_to_include is not None and field_name not in options.tags_to_include:
338
- continue
339
- tag = [field_type,field_name,field_value]
340
-
341
- exif_tags.append(tag)
342
-
343
- # ...for each output line
344
-
345
- result['status'] = 'success'
346
- result['tags'] = exif_tags
347
- return result
348
-
349
- else:
350
-
351
- raise ValueError('Unknown processing library {}'.format(
352
- options.processing_library))
353
-
354
- # ...which processing library are we using?
355
-
356
- # ...read_exif_tags_for_image()
357
-
358
-
359
- def _populate_exif_data(im, image_base, options=None):
360
- """
361
- Populate EXIF data into the 'exif_tags' field in the image object [im].
362
-
363
- im['file_name'] should be prepopulated, relative to image_base.
364
-
365
- Returns a modified version of [im], also modifies [im] in place.
366
- """
367
-
368
- if options is None:
369
- options = ReadExifOptions()
370
-
371
- fn = im['file_name']
372
- if options.verbose:
373
- print('Processing {}'.format(fn))
374
-
375
- try:
376
-
377
- file_path = os.path.join(image_base,fn)
378
- assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
379
- result = read_exif_tags_for_image(file_path,options)
380
- if result['status'] == 'success':
381
- exif_tags = result['tags']
382
- im['exif_tags'] = exif_tags
383
- else:
384
- im['exif_tags'] = None
385
- im['status'] = result['status']
386
- if 'error' in result:
387
- im['error'] = result['error']
388
- if options.verbose:
389
- print('Error reading EXIF data for {}'.format(file_path))
390
-
391
- except Exception as e:
392
-
393
- s = 'Error on {}: {}'.format(fn,str(e))
394
- print(s)
395
- im['error'] = s
396
- im['status'] = 'read failure'
397
- im['exif_tags'] = None
398
-
399
- return im
400
-
401
- # ..._populate_exif_data()
402
-
403
-
404
- def _create_image_objects(image_files,recursive=True):
405
- """
406
- Create empty image objects for every image in [image_files], which can be a
407
- list of relative paths (which will get stored without processing, so the base
408
- path doesn't matter here), or a folder name.
409
-
410
- Returns a list of dicts with field 'file_name' (a relative path).
411
-
412
- "recursive" is ignored if "image_files" is a list.
413
- """
414
-
415
- # Enumerate *relative* paths
416
- if isinstance(image_files,str):
417
- print('Enumerating image files in {}'.format(image_files))
418
- assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
419
- image_files = find_images(image_files,
420
- recursive=recursive,
421
- return_relative_paths=True,
422
- convert_slashes=True)
423
-
424
- images = []
425
- for fn in image_files:
426
- im = {}
427
- im['file_name'] = fn
428
- images.append(im)
429
-
430
- if debug_max_images is not None:
431
- print('Trimming input list to {} images'.format(debug_max_images))
432
- images = images[0:debug_max_images]
433
-
434
- return images
435
-
436
-
437
- def _populate_exif_for_images(image_base,images,options=None):
438
- """
439
- Main worker loop: read EXIF data for each image object in [images] and
440
- populate the image objects.
441
-
442
- 'images' should be a list of dicts with the field 'file_name' containing
443
- a relative path (relative to 'image_base').
444
- """
445
-
446
- if options is None:
447
- options = ReadExifOptions()
448
-
449
- if options.n_workers == 1:
450
-
451
- results = []
452
- for im in tqdm(images):
453
- results.append(_populate_exif_data(im,image_base,options))
454
-
455
- else:
456
-
457
- from functools import partial
458
- if options.use_threads:
459
- print('Starting parallel thread pool with {} workers'.format(options.n_workers))
460
- pool = ThreadPool(options.n_workers)
461
- else:
462
- print('Starting parallel process pool with {} workers'.format(options.n_workers))
463
- pool = Pool(options.n_workers)
464
-
465
- results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
466
- options=options),images),total=len(images)))
467
-
468
- return results
469
-
470
-
471
- def _write_exif_results(results,output_file):
472
- """
473
- Write EXIF information to [output_file].
474
-
475
- 'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
476
-
477
- Writes to .csv or .json depending on the extension of 'output_file'.
478
- """
479
-
480
- if output_file.endswith('.json'):
481
-
482
- with open(output_file,'w') as f:
483
- json.dump(results,f,indent=1,default=str)
484
-
485
- elif output_file.endswith('.csv'):
486
-
487
- # Find all EXIF tags that exist in any image
488
- all_keys = set()
489
- for im in results:
490
-
491
- keys_this_image = set()
492
- exif_tags = im['exif_tags']
493
- file_name = im['file_name']
494
- for tag in exif_tags:
495
- tag_name = tag[1]
496
- assert tag_name not in keys_this_image, \
497
- 'Error: tag {} appears twice in image {}'.format(
498
- tag_name,file_name)
499
- all_keys.add(tag_name)
500
-
501
- # ...for each tag in this image
502
-
503
- # ...for each image
504
-
505
- all_keys = sorted(list(all_keys))
506
-
507
- header = ['File Name']
508
- header.extend(all_keys)
509
-
510
- import csv
511
- with open(output_file,'w') as csvfile:
512
-
513
- writer = csv.writer(csvfile)
514
-
515
- # Write header
516
- writer.writerow(header)
517
-
518
- for im in results:
519
-
520
- row = [im['file_name']]
521
- kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
522
-
523
- for i_key,key in enumerate(all_keys):
524
- value = ''
525
- if key in kvp_this_image:
526
- value = kvp_this_image[key]
527
- row.append(value)
528
- # ...for each key that *might* be present in this image
529
-
530
- assert len(row) == len(header)
531
-
532
- writer.writerow(row)
533
-
534
- # ...for each image
535
-
536
- # ...with open()
537
-
538
- else:
539
-
540
- raise ValueError('Could not determine output type from file {}'.format(
541
- output_file))
542
-
543
- # ...if we're writing to .json/.csv
544
-
545
- print('Wrote results to {}'.format(output_file))
546
-
547
-
548
- def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
549
- """
550
- Read EXIF data for a folder of images.
551
-
552
- Args:
553
- input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
554
- paths
555
- output_file (str, optional): .json file to which we should write results; if this is None, results
556
- are returned but not written to disk
557
- options (ReadExifOptions, optional): parameters controlling metadata extraction
558
- filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
559
- a list of absolute filenames (if [input_folder] is None)
560
- recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
561
- is None.
562
-
563
- Returns:
564
- dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
565
- we're using PIL or exiftool.
566
- """
567
-
568
- if options is None:
569
- options = ReadExifOptions()
570
-
571
- # Validate options
572
- if options.tags_to_include is not None:
573
- assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
574
- if options.tags_to_exclude is not None:
575
- assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
576
-
577
- if input_folder is None:
578
- input_folder = ''
579
- if len(input_folder) > 0:
580
- assert os.path.isdir(input_folder), \
581
- '{} is not a valid folder'.format(input_folder)
582
-
583
- assert (len(input_folder) > 0) or (filenames is not None), \
584
- 'Must specify either a folder or a list of files'
585
-
586
- if output_file is not None:
587
-
588
- assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
589
- 'I only know how to write results to .json or .csv'
590
-
591
- try:
592
- with open(output_file, 'a') as f:
593
- if not f.writable():
594
- raise IOError('File not writable')
595
- except Exception:
596
- print('Could not write to file {}'.format(output_file))
597
- raise
598
-
599
- if options.processing_library == 'exif':
600
- assert is_executable(options.exiftool_command_name), 'exiftool not available'
601
-
602
- if filenames is None:
603
- images = _create_image_objects(input_folder,recursive=recursive)
604
- else:
605
- assert isinstance(filenames,list)
606
- images = _create_image_objects(filenames)
607
-
608
- results = _populate_exif_for_images(input_folder,images,options)
609
-
610
- if output_file is not None:
611
- try:
612
- _write_exif_results(results,output_file)
613
- except Exception as e:
614
- if not options.allow_write_error:
615
- raise
616
- else:
617
- print('Warning: error serializing EXIF data: {}'.format(str(e)))
618
-
619
- return results
620
-
621
-
622
- #%% Interactive driver
623
-
624
- if False:
625
-
626
- #%%
627
-
628
- input_folder = r'C:\temp\md-name-testing'
629
- output_file = None # r'C:\temp\md-name-testing\exif.json'
630
- options = ReadExifOptions()
631
- options.verbose = False
632
- options.n_workers = 10
633
- options.use_threads = False
634
- options.processing_library = 'pil'
635
- # options.processing_library = 'exiftool'
636
- options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
637
- # options.tags_to_exclude = ['MakerNote']
638
-
639
- results = read_exif_from_folder(input_folder,output_file,options)
640
-
641
- #%%
642
-
643
- with open(output_file,'r') as f:
644
- d = json.load(f)
645
-
646
-
647
- #%% Command-line driver
648
-
649
- import argparse
650
- import sys
651
-
652
- def main():
653
-
654
- options = ReadExifOptions()
655
-
656
- parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
657
- ' a folder, and write the results to .csv or .json'))
658
-
659
- parser.add_argument('input_folder', type=str,
660
- help='Folder of images from which we should read EXIF information')
661
- parser.add_argument('output_file', type=str,
662
- help='Output file (.json) to which we should write EXIF information')
663
- parser.add_argument('--n_workers', type=int, default=1,
664
- help='Number of concurrent workers to use (defaults to 1)')
665
- parser.add_argument('--use_threads', action='store_true',
666
- help='Use threads (instead of processes) for multitasking')
667
- parser.add_argument('--processing_library', type=str, default=options.processing_library,
668
- help='Processing library (exif or pil)')
669
-
670
- if len(sys.argv[1:]) == 0:
671
- parser.print_help()
672
- parser.exit()
673
-
674
- args = parser.parse_args()
675
- args_to_object(args, options)
676
- options.processing_library = options.processing_library.lower()
677
-
678
- read_exif_from_folder(args.input_folder,args.output_file,options)
679
-
680
- if __name__ == '__main__':
681
- main()