megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
read_exif.py
|
|
4
4
|
|
|
5
|
-
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
-
and writes them to a .json or .csv file.
|
|
5
|
+
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
+
and writes them to a .json or .csv file.
|
|
7
7
|
|
|
8
8
|
This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
|
|
9
9
|
can read everything). The latter approach expects that exiftool is available on the system
|
|
@@ -16,6 +16,9 @@ path. No attempt is made to be consistent in format across the two approaches.
|
|
|
16
16
|
import os
|
|
17
17
|
import subprocess
|
|
18
18
|
import json
|
|
19
|
+
import argparse
|
|
20
|
+
import sys
|
|
21
|
+
|
|
19
22
|
from datetime import datetime
|
|
20
23
|
|
|
21
24
|
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
@@ -23,6 +26,7 @@ from multiprocessing.pool import Pool as Pool
|
|
|
23
26
|
|
|
24
27
|
from tqdm import tqdm
|
|
25
28
|
from PIL import Image, ExifTags
|
|
29
|
+
from functools import partial
|
|
26
30
|
|
|
27
31
|
from megadetector.utils.path_utils import find_images, is_executable
|
|
28
32
|
from megadetector.utils.ct_utils import args_to_object
|
|
@@ -31,6 +35,9 @@ from megadetector.data_management.cct_json_utils import write_object_with_serial
|
|
|
31
35
|
|
|
32
36
|
debug_max_images = None
|
|
33
37
|
|
|
38
|
+
minimal_exif_tags = \
|
|
39
|
+
['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTimeOriginal','Orientation']
|
|
40
|
+
|
|
34
41
|
|
|
35
42
|
#%% Options
|
|
36
43
|
|
|
@@ -38,85 +45,84 @@ class ReadExifOptions:
|
|
|
38
45
|
"""
|
|
39
46
|
Parameters controlling metadata extraction.
|
|
40
47
|
"""
|
|
41
|
-
|
|
48
|
+
|
|
42
49
|
def __init__(self):
|
|
43
|
-
|
|
50
|
+
|
|
44
51
|
#: Enable additional debug console output
|
|
45
52
|
self.verbose = False
|
|
46
|
-
|
|
53
|
+
|
|
47
54
|
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
48
55
|
#: and we encounter a serialization issue, we'll return the results but won't
|
|
49
|
-
#: error.
|
|
56
|
+
#: error.
|
|
50
57
|
self.allow_write_error = False
|
|
51
|
-
|
|
58
|
+
|
|
52
59
|
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
53
60
|
self.n_workers = 1
|
|
54
|
-
|
|
61
|
+
|
|
55
62
|
#: Should we use threads (vs. processes) for parallelization?
|
|
56
63
|
#:
|
|
57
64
|
#: Not relevant if n_workers is <= 1.
|
|
58
65
|
self.use_threads = True
|
|
59
|
-
|
|
60
|
-
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
66
|
+
|
|
67
|
+
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
61
68
|
#: doesn't come from EXIF, rather from the file (e.g. file size).
|
|
62
69
|
self.tag_types_to_ignore = set(['File','ExifTool'])
|
|
63
|
-
|
|
70
|
+
|
|
64
71
|
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
65
72
|
#:
|
|
66
73
|
#: A useful set of tags one might want to limit queries for:
|
|
67
74
|
#:
|
|
68
|
-
#: options.tags_to_include =
|
|
69
|
-
#: 'DateTimeOriginal','Orientation']
|
|
75
|
+
#: options.tags_to_include = minimal_exif_tags
|
|
70
76
|
self.tags_to_include = None
|
|
71
|
-
|
|
77
|
+
|
|
72
78
|
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
73
79
|
self.tags_to_exclude = None
|
|
74
|
-
|
|
80
|
+
|
|
75
81
|
#: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
|
|
76
82
|
#: can be just "exiftool", in which case it should be on your system path.
|
|
77
83
|
self.exiftool_command_name = 'exiftool'
|
|
78
|
-
|
|
84
|
+
|
|
79
85
|
#: How should we handle byte-formatted EXIF tags?
|
|
80
86
|
#:
|
|
81
87
|
#: 'convert_to_string': convert to a Python string
|
|
82
88
|
#: 'delete': don't include at all
|
|
83
89
|
#: 'raw': include as a byte string
|
|
84
90
|
self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
85
|
-
|
|
91
|
+
|
|
86
92
|
#: Should we use exiftool or PIL?
|
|
87
93
|
self.processing_library = 'pil' # 'exiftool','pil'
|
|
88
|
-
|
|
94
|
+
|
|
89
95
|
|
|
90
96
|
class ExifResultsToCCTOptions:
|
|
91
97
|
"""
|
|
92
|
-
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
98
|
+
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
93
99
|
information) extracted by read_exif_from_folder().
|
|
94
100
|
"""
|
|
95
|
-
|
|
101
|
+
|
|
96
102
|
def __init__(self):
|
|
97
|
-
|
|
103
|
+
|
|
98
104
|
#: Timestamps older than this are assumed to be junk; lots of cameras use a
|
|
99
105
|
#: default time in 2000.
|
|
100
106
|
self.min_valid_timestamp_year = 2001
|
|
101
|
-
|
|
107
|
+
|
|
102
108
|
#: The EXIF tag from which to pull datetime information
|
|
103
109
|
self.exif_datetime_tag = 'DateTimeOriginal'
|
|
104
|
-
|
|
110
|
+
|
|
105
111
|
#: Function for extracting location information, should take a string
|
|
106
112
|
#: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
|
|
107
113
|
#: this is None, location is written as "unknown".
|
|
108
114
|
self.filename_to_location_function = image_file_to_camera_folder
|
|
109
|
-
|
|
115
|
+
|
|
110
116
|
|
|
111
117
|
#%% Functions
|
|
112
118
|
|
|
113
119
|
def _get_exif_ifd(exif):
|
|
114
120
|
"""
|
|
115
121
|
Read EXIF data from by finding the EXIF offset and reading tags directly
|
|
116
|
-
|
|
122
|
+
|
|
117
123
|
https://github.com/python-pillow/Pillow/issues/5863
|
|
118
124
|
"""
|
|
119
|
-
|
|
125
|
+
|
|
120
126
|
# Find the offset for all the EXIF information
|
|
121
127
|
for key, value in ExifTags.TAGS.items():
|
|
122
128
|
if value == "ExifOffset":
|
|
@@ -130,19 +136,19 @@ def _get_exif_ifd(exif):
|
|
|
130
136
|
|
|
131
137
|
def has_gps_info(im):
|
|
132
138
|
"""
|
|
133
|
-
Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
|
|
139
|
+
Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
|
|
134
140
|
determine whether GPS location information is present in this image. Does not retrieve
|
|
135
141
|
location info, currently only used to determine whether it's present.
|
|
136
|
-
|
|
142
|
+
|
|
137
143
|
Args:
|
|
138
144
|
im (str, PIL.Image.Image, dict): image for which we should determine GPS metadata
|
|
139
145
|
presence
|
|
140
|
-
|
|
146
|
+
|
|
141
147
|
Returns:
|
|
142
148
|
bool: whether GPS metadata is present, or None if we failed to read EXIF data from
|
|
143
149
|
a file.
|
|
144
150
|
"""
|
|
145
|
-
|
|
151
|
+
|
|
146
152
|
if isinstance(im,str) or isinstance(im,Image.Image):
|
|
147
153
|
exif_tags = read_pil_exif(im)
|
|
148
154
|
if exif_tags is None:
|
|
@@ -151,57 +157,57 @@ def has_gps_info(im):
|
|
|
151
157
|
else:
|
|
152
158
|
assert isinstance(im,dict)
|
|
153
159
|
exif_tags = im
|
|
154
|
-
|
|
160
|
+
|
|
155
161
|
if 'exif_tags' in exif_tags:
|
|
156
162
|
exif_tags = exif_tags['exif_tags']
|
|
157
163
|
if exif_tags is None:
|
|
158
164
|
return None
|
|
159
|
-
|
|
165
|
+
|
|
160
166
|
if 'GPSInfo' in exif_tags and \
|
|
161
167
|
exif_tags['GPSInfo'] is not None and \
|
|
162
168
|
isinstance(exif_tags['GPSInfo'],dict):
|
|
163
|
-
|
|
169
|
+
|
|
164
170
|
# Don't indicate that GPS data is present if only GPS version info is present
|
|
165
171
|
if ('GPSLongitude' in exif_tags['GPSInfo']) or ('GPSLatitude' in exif_tags['GPSInfo']):
|
|
166
172
|
return True
|
|
167
173
|
return False
|
|
168
|
-
|
|
174
|
+
|
|
169
175
|
return False
|
|
170
|
-
|
|
171
|
-
# ...def has_gps_info(...)
|
|
176
|
+
|
|
177
|
+
# ...def has_gps_info(...)
|
|
172
178
|
|
|
173
179
|
|
|
174
180
|
def read_pil_exif(im,options=None):
|
|
175
181
|
"""
|
|
176
182
|
Read all the EXIF data we know how to read from an image, using PIL. This is primarily
|
|
177
|
-
an internal function; the main entry point for single-image EXIF information is
|
|
183
|
+
an internal function; the main entry point for single-image EXIF information is
|
|
178
184
|
read_exif_tags_for_image().
|
|
179
|
-
|
|
185
|
+
|
|
180
186
|
Args:
|
|
181
|
-
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
187
|
+
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
182
188
|
we should read EXIF data.
|
|
183
|
-
|
|
189
|
+
|
|
184
190
|
Returns:
|
|
185
191
|
dict: a dictionary mapping EXIF tag names to their values
|
|
186
192
|
"""
|
|
187
|
-
|
|
193
|
+
|
|
188
194
|
if options is None:
|
|
189
195
|
options = ReadExifOptions()
|
|
190
|
-
|
|
196
|
+
|
|
191
197
|
image_name = '[image]'
|
|
192
198
|
if isinstance(im,str):
|
|
193
199
|
image_name = im
|
|
194
200
|
im = Image.open(im)
|
|
195
|
-
|
|
201
|
+
|
|
196
202
|
exif_tags = {}
|
|
197
203
|
try:
|
|
198
204
|
exif_info = im.getexif()
|
|
199
205
|
except Exception:
|
|
200
206
|
exif_info = None
|
|
201
|
-
|
|
207
|
+
|
|
202
208
|
if exif_info is None:
|
|
203
209
|
return exif_tags
|
|
204
|
-
|
|
210
|
+
|
|
205
211
|
for k, v in exif_info.items():
|
|
206
212
|
assert isinstance(k,str) or isinstance(k,int), \
|
|
207
213
|
'Invalid EXIF key {}'.format(str(k))
|
|
@@ -210,9 +216,9 @@ def read_pil_exif(im,options=None):
|
|
|
210
216
|
else:
|
|
211
217
|
# print('Warning: unrecognized EXIF tag: {}'.format(k))
|
|
212
218
|
exif_tags[k] = str(v)
|
|
213
|
-
|
|
219
|
+
|
|
214
220
|
exif_ifd_tags = _get_exif_ifd(exif_info)
|
|
215
|
-
|
|
221
|
+
|
|
216
222
|
for k in exif_ifd_tags.keys():
|
|
217
223
|
v = exif_ifd_tags[k]
|
|
218
224
|
if k in exif_tags:
|
|
@@ -221,16 +227,16 @@ def read_pil_exif(im,options=None):
|
|
|
221
227
|
k,image_name,exif_tags[k],v))
|
|
222
228
|
else:
|
|
223
229
|
exif_tags[k] = v
|
|
224
|
-
|
|
230
|
+
|
|
225
231
|
exif_tag_names = list(exif_tags.keys())
|
|
226
|
-
|
|
232
|
+
|
|
227
233
|
# Type conversion and cleanup
|
|
228
|
-
#
|
|
234
|
+
#
|
|
229
235
|
# Most quirky types will get serialized to string when we write to .json.
|
|
230
236
|
for k in exif_tag_names:
|
|
231
|
-
|
|
237
|
+
|
|
232
238
|
if isinstance(exif_tags[k],bytes):
|
|
233
|
-
|
|
239
|
+
|
|
234
240
|
if options.byte_handling == 'delete':
|
|
235
241
|
del exif_tags[k]
|
|
236
242
|
elif options.byte_handling == 'raw':
|
|
@@ -238,24 +244,24 @@ def read_pil_exif(im,options=None):
|
|
|
238
244
|
else:
|
|
239
245
|
assert options.byte_handling == 'convert_to_string'
|
|
240
246
|
exif_tags[k] = str(exif_tags[k])
|
|
241
|
-
|
|
247
|
+
|
|
242
248
|
elif isinstance(exif_tags[k],str):
|
|
243
|
-
|
|
249
|
+
|
|
244
250
|
exif_tags[k] = exif_tags[k].strip()
|
|
245
|
-
|
|
246
|
-
# Special case for GPS info... I could decode other encoded tags, but GPS info is
|
|
251
|
+
|
|
252
|
+
# Special case for GPS info... I could decode other encoded tags, but GPS info is
|
|
247
253
|
# particularly important, so I'm only doing that for now.
|
|
248
254
|
if 'GPSInfo' in exif_tags:
|
|
249
|
-
|
|
255
|
+
|
|
250
256
|
try:
|
|
251
|
-
|
|
257
|
+
|
|
252
258
|
# Find the tag number for GPS info, in practice should alays be 34853
|
|
253
|
-
|
|
254
|
-
assert
|
|
255
|
-
|
|
259
|
+
gpsinfo_tag = next(tag for tag, name in ExifTags.TAGS.items() if name == "GPSInfo")
|
|
260
|
+
assert gpsinfo_tag == 34853
|
|
261
|
+
|
|
256
262
|
# These are integer keys, e.g. {7: (14.0, 27.0, 7.24)}
|
|
257
|
-
gps_info_raw = exif_info.get_ifd(
|
|
258
|
-
|
|
263
|
+
gps_info_raw = exif_info.get_ifd(gpsinfo_tag)
|
|
264
|
+
|
|
259
265
|
# Convert to strings, e.g. 'GPSTimeStamp'
|
|
260
266
|
gps_info = {}
|
|
261
267
|
for int_tag,v in enumerate(gps_info_raw.keys()):
|
|
@@ -264,15 +270,15 @@ def read_pil_exif(im,options=None):
|
|
|
264
270
|
gps_info[ExifTags.GPSTAGS[int_tag]] = v
|
|
265
271
|
else:
|
|
266
272
|
gps_info[int_tag] = v
|
|
267
|
-
|
|
273
|
+
|
|
268
274
|
exif_tags['GPSInfo'] = gps_info
|
|
269
|
-
|
|
275
|
+
|
|
270
276
|
except Exception as e:
|
|
271
277
|
if options.verbose:
|
|
272
278
|
print('Warning: error reading GPS info: {}'.format(str(e)))
|
|
273
|
-
|
|
279
|
+
|
|
274
280
|
# ...if we think there might be GPS tags in this image
|
|
275
|
-
|
|
281
|
+
|
|
276
282
|
return exif_tags
|
|
277
283
|
|
|
278
284
|
# ...read_pil_exif()
|
|
@@ -283,26 +289,26 @@ def format_datetime_as_exif_datetime_string(dt):
|
|
|
283
289
|
Returns a Python datetime object rendered using the standard EXIF datetime
|
|
284
290
|
string format ('%Y:%m:%d %H:%M:%S')
|
|
285
291
|
"""
|
|
286
|
-
|
|
292
|
+
|
|
287
293
|
return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
|
|
288
|
-
|
|
294
|
+
|
|
289
295
|
|
|
290
296
|
def parse_exif_datetime_string(s,verbose=False):
|
|
291
297
|
""""
|
|
292
|
-
Exif datetimes are strings, but in a standard format:
|
|
293
|
-
|
|
298
|
+
Exif datetimes are strings, but in a standard format:
|
|
299
|
+
|
|
294
300
|
%Y:%m:%d %H:%M:%S
|
|
295
|
-
|
|
301
|
+
|
|
296
302
|
Parses one of those strings into a Python datetime object.
|
|
297
|
-
|
|
303
|
+
|
|
298
304
|
Args:
|
|
299
305
|
s (str): datetime string to parse, should be in standard EXIF datetime format
|
|
300
306
|
verbose (bool, optional): enable additional debug output
|
|
301
|
-
|
|
307
|
+
|
|
302
308
|
Returns:
|
|
303
309
|
datetime: the datetime object created from [s]
|
|
304
310
|
"""
|
|
305
|
-
|
|
311
|
+
|
|
306
312
|
dt = None
|
|
307
313
|
try:
|
|
308
314
|
dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
|
|
@@ -317,7 +323,7 @@ def _filter_tags(tags,options):
|
|
|
317
323
|
Internal function used to include/exclude specific tags from the exif_tags
|
|
318
324
|
dict.
|
|
319
325
|
"""
|
|
320
|
-
|
|
326
|
+
|
|
321
327
|
if options is None:
|
|
322
328
|
return tags
|
|
323
329
|
if options.tags_to_include is None and options.tags_to_exclude is None:
|
|
@@ -341,23 +347,23 @@ def _filter_tags(tags,options):
|
|
|
341
347
|
def read_exif_tags_for_image(file_path,options=None):
|
|
342
348
|
"""
|
|
343
349
|
Get relevant fields from EXIF data for an image
|
|
344
|
-
|
|
350
|
+
|
|
345
351
|
Returns:
|
|
346
|
-
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
352
|
+
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
347
353
|
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
348
354
|
options.processing_library:
|
|
349
|
-
|
|
355
|
+
|
|
350
356
|
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
|
|
351
357
|
- For PIL, 'tags' is a dict (str:str)
|
|
352
358
|
"""
|
|
353
|
-
|
|
359
|
+
|
|
354
360
|
if options is None:
|
|
355
361
|
options = ReadExifOptions()
|
|
356
|
-
|
|
362
|
+
|
|
357
363
|
result = {'status':'unknown','tags':[]}
|
|
358
|
-
|
|
364
|
+
|
|
359
365
|
if options.processing_library == 'pil':
|
|
360
|
-
|
|
366
|
+
|
|
361
367
|
try:
|
|
362
368
|
exif_tags = read_pil_exif(file_path,options)
|
|
363
369
|
|
|
@@ -367,18 +373,18 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
367
373
|
file_path,str(e)))
|
|
368
374
|
result['status'] = 'read_failure'
|
|
369
375
|
result['error'] = str(e)
|
|
370
|
-
|
|
376
|
+
|
|
371
377
|
if result['status'] == 'unknown':
|
|
372
|
-
if exif_tags is None:
|
|
378
|
+
if exif_tags is None:
|
|
373
379
|
result['status'] = 'empty_read'
|
|
374
380
|
else:
|
|
375
381
|
result['status'] = 'success'
|
|
376
382
|
result['tags'] = _filter_tags(exif_tags,options)
|
|
377
|
-
|
|
383
|
+
|
|
378
384
|
return result
|
|
379
|
-
|
|
385
|
+
|
|
380
386
|
elif options.processing_library == 'exiftool':
|
|
381
|
-
|
|
387
|
+
|
|
382
388
|
# -G means "Print group name for each tag", e.g. print:
|
|
383
389
|
#
|
|
384
390
|
# [File] Bits Per Sample : 8
|
|
@@ -388,95 +394,95 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
388
394
|
# Bits Per Sample : 8
|
|
389
395
|
proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
|
|
390
396
|
stdout=subprocess.PIPE, encoding='utf8')
|
|
391
|
-
|
|
392
|
-
exif_lines = proc.stdout.readlines()
|
|
397
|
+
|
|
398
|
+
exif_lines = proc.stdout.readlines()
|
|
393
399
|
exif_lines = [s.strip() for s in exif_lines]
|
|
394
400
|
if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
|
|
395
401
|
any([s.lower().startswith('[exif]') for s in exif_lines])):
|
|
396
402
|
result['status'] = 'failure'
|
|
397
403
|
return result
|
|
398
|
-
|
|
404
|
+
|
|
399
405
|
# A list of three-element lists (type/tag/value)
|
|
400
406
|
exif_tags = []
|
|
401
|
-
|
|
407
|
+
|
|
402
408
|
# line_raw = exif_lines[0]
|
|
403
409
|
for line_raw in exif_lines:
|
|
404
|
-
|
|
410
|
+
|
|
405
411
|
# A typical line:
|
|
406
412
|
#
|
|
407
413
|
# [ExifTool] ExifTool Version Number : 12.13
|
|
408
|
-
|
|
414
|
+
|
|
409
415
|
line = line_raw.strip()
|
|
410
|
-
|
|
416
|
+
|
|
411
417
|
# Split on the first occurrence of ":"
|
|
412
418
|
tokens = line.split(':',1)
|
|
413
419
|
assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
|
|
414
420
|
len(tokens))
|
|
415
|
-
|
|
416
|
-
field_value = tokens[1].strip()
|
|
417
|
-
|
|
418
|
-
field_name_type = tokens[0].strip()
|
|
421
|
+
|
|
422
|
+
field_value = tokens[1].strip()
|
|
423
|
+
|
|
424
|
+
field_name_type = tokens[0].strip()
|
|
419
425
|
field_name_type_tokens = field_name_type.split(None,1)
|
|
420
426
|
assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
|
|
421
|
-
|
|
427
|
+
|
|
422
428
|
field_type = field_name_type_tokens[0].strip()
|
|
423
429
|
assert field_type.startswith('[') and field_type.endswith(']'), \
|
|
424
430
|
'Invalid EXIF field {}'.format(field_type)
|
|
425
431
|
field_type = field_type[1:-1]
|
|
426
|
-
|
|
432
|
+
|
|
427
433
|
if field_type in options.tag_types_to_ignore:
|
|
428
434
|
if options.verbose:
|
|
429
435
|
print('Ignoring tag with type {}'.format(field_type))
|
|
430
|
-
continue
|
|
431
|
-
|
|
436
|
+
continue
|
|
437
|
+
|
|
432
438
|
field_name = field_name_type_tokens[1].strip()
|
|
433
439
|
if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
|
|
434
440
|
continue
|
|
435
441
|
if options.tags_to_include is not None and field_name not in options.tags_to_include:
|
|
436
442
|
continue
|
|
437
443
|
tag = [field_type,field_name,field_value]
|
|
438
|
-
|
|
444
|
+
|
|
439
445
|
exif_tags.append(tag)
|
|
440
|
-
|
|
446
|
+
|
|
441
447
|
# ...for each output line
|
|
442
|
-
|
|
448
|
+
|
|
443
449
|
result['status'] = 'success'
|
|
444
450
|
result['tags'] = exif_tags
|
|
445
451
|
return result
|
|
446
|
-
|
|
452
|
+
|
|
447
453
|
else:
|
|
448
|
-
|
|
454
|
+
|
|
449
455
|
raise ValueError('Unknown processing library {}'.format(
|
|
450
456
|
options.processing_library))
|
|
451
457
|
|
|
452
458
|
# ...which processing library are we using?
|
|
453
|
-
|
|
459
|
+
|
|
454
460
|
# ...read_exif_tags_for_image()
|
|
455
461
|
|
|
456
462
|
|
|
457
463
|
def _populate_exif_data(im, image_base, options=None):
|
|
458
464
|
"""
|
|
459
465
|
Populate EXIF data into the 'exif_tags' field in the image object [im].
|
|
460
|
-
|
|
466
|
+
|
|
461
467
|
im['file_name'] should be prepopulated, relative to image_base.
|
|
462
|
-
|
|
468
|
+
|
|
463
469
|
Returns a modified version of [im], also modifies [im] in place.
|
|
464
470
|
"""
|
|
465
|
-
|
|
471
|
+
|
|
466
472
|
if options is None:
|
|
467
473
|
options = ReadExifOptions()
|
|
468
474
|
|
|
469
475
|
fn = im['file_name']
|
|
470
476
|
if options.verbose:
|
|
471
477
|
print('Processing {}'.format(fn))
|
|
472
|
-
|
|
478
|
+
|
|
473
479
|
try:
|
|
474
|
-
|
|
480
|
+
|
|
475
481
|
file_path = os.path.join(image_base,fn)
|
|
476
482
|
assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
|
|
477
483
|
result = read_exif_tags_for_image(file_path,options)
|
|
478
484
|
if result['status'] == 'success':
|
|
479
|
-
exif_tags = result['tags']
|
|
485
|
+
exif_tags = result['tags']
|
|
480
486
|
im['exif_tags'] = exif_tags
|
|
481
487
|
else:
|
|
482
488
|
im['exif_tags'] = None
|
|
@@ -485,15 +491,15 @@ def _populate_exif_data(im, image_base, options=None):
|
|
|
485
491
|
im['error'] = result['error']
|
|
486
492
|
if options.verbose:
|
|
487
493
|
print('Error reading EXIF data for {}'.format(file_path))
|
|
488
|
-
|
|
494
|
+
|
|
489
495
|
except Exception as e:
|
|
490
|
-
|
|
496
|
+
|
|
491
497
|
s = 'Error on {}: {}'.format(fn,str(e))
|
|
492
498
|
print(s)
|
|
493
499
|
im['error'] = s
|
|
494
500
|
im['status'] = 'read failure'
|
|
495
501
|
im['exif_tags'] = None
|
|
496
|
-
|
|
502
|
+
|
|
497
503
|
return im
|
|
498
504
|
|
|
499
505
|
# ..._populate_exif_data()
|
|
@@ -501,67 +507,72 @@ def _populate_exif_data(im, image_base, options=None):
|
|
|
501
507
|
|
|
502
508
|
def _create_image_objects(image_files,recursive=True):
|
|
503
509
|
"""
|
|
504
|
-
Create empty image objects for every image in [image_files], which can be a
|
|
505
|
-
list of relative paths (which will get stored without processing, so the base
|
|
510
|
+
Create empty image objects for every image in [image_files], which can be a
|
|
511
|
+
list of relative paths (which will get stored without processing, so the base
|
|
506
512
|
path doesn't matter here), or a folder name.
|
|
507
|
-
|
|
513
|
+
|
|
508
514
|
Returns a list of dicts with field 'file_name' (a relative path).
|
|
509
|
-
|
|
515
|
+
|
|
510
516
|
"recursive" is ignored if "image_files" is a list.
|
|
511
517
|
"""
|
|
512
|
-
|
|
518
|
+
|
|
513
519
|
# Enumerate *relative* paths
|
|
514
|
-
if isinstance(image_files,str):
|
|
520
|
+
if isinstance(image_files,str):
|
|
515
521
|
print('Enumerating image files in {}'.format(image_files))
|
|
516
522
|
assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
|
|
517
523
|
image_files = find_images(image_files,
|
|
518
524
|
recursive=recursive,
|
|
519
525
|
return_relative_paths=True,
|
|
520
526
|
convert_slashes=True)
|
|
521
|
-
|
|
527
|
+
|
|
522
528
|
images = []
|
|
523
529
|
for fn in image_files:
|
|
524
530
|
im = {}
|
|
525
531
|
im['file_name'] = fn
|
|
526
532
|
images.append(im)
|
|
527
|
-
|
|
533
|
+
|
|
528
534
|
if debug_max_images is not None:
|
|
529
535
|
print('Trimming input list to {} images'.format(debug_max_images))
|
|
530
536
|
images = images[0:debug_max_images]
|
|
531
|
-
|
|
537
|
+
|
|
532
538
|
return images
|
|
533
539
|
|
|
534
540
|
|
|
535
541
|
def _populate_exif_for_images(image_base,images,options=None):
|
|
536
542
|
"""
|
|
537
|
-
Main worker loop: read EXIF data for each image object in [images] and
|
|
543
|
+
Main worker loop: read EXIF data for each image object in [images] and
|
|
538
544
|
populate the image objects in place.
|
|
539
|
-
|
|
545
|
+
|
|
540
546
|
'images' should be a list of dicts with the field 'file_name' containing
|
|
541
|
-
a relative path (relative to 'image_base').
|
|
547
|
+
a relative path (relative to 'image_base').
|
|
542
548
|
"""
|
|
543
|
-
|
|
549
|
+
|
|
544
550
|
if options is None:
|
|
545
551
|
options = ReadExifOptions()
|
|
546
552
|
|
|
547
553
|
if options.n_workers == 1:
|
|
548
|
-
|
|
554
|
+
|
|
549
555
|
results = []
|
|
550
556
|
for im in tqdm(images):
|
|
551
557
|
results.append(_populate_exif_data(im,image_base,options))
|
|
552
|
-
|
|
558
|
+
|
|
553
559
|
else:
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
560
|
+
|
|
561
|
+
pool = None
|
|
562
|
+
try:
|
|
563
|
+
if options.use_threads:
|
|
564
|
+
print('Starting parallel thread pool with {} workers'.format(options.n_workers))
|
|
565
|
+
pool = ThreadPool(options.n_workers)
|
|
566
|
+
else:
|
|
567
|
+
print('Starting parallel process pool with {} workers'.format(options.n_workers))
|
|
568
|
+
pool = Pool(options.n_workers)
|
|
569
|
+
|
|
570
|
+
results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
|
|
571
|
+
options=options),images),total=len(images)))
|
|
572
|
+
finally:
|
|
573
|
+
pool.close()
|
|
574
|
+
pool.join()
|
|
575
|
+
print("Pool closed and joined for EXIF extraction")
|
|
565
576
|
|
|
566
577
|
return results
|
|
567
578
|
|
|
@@ -569,23 +580,23 @@ def _populate_exif_for_images(image_base,images,options=None):
|
|
|
569
580
|
def _write_exif_results(results,output_file):
|
|
570
581
|
"""
|
|
571
582
|
Write EXIF information to [output_file].
|
|
572
|
-
|
|
583
|
+
|
|
573
584
|
'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
|
|
574
585
|
|
|
575
|
-
Writes to .csv or .json depending on the extension of 'output_file'.
|
|
586
|
+
Writes to .csv or .json depending on the extension of 'output_file'.
|
|
576
587
|
"""
|
|
577
|
-
|
|
588
|
+
|
|
578
589
|
if output_file.endswith('.json'):
|
|
579
|
-
|
|
590
|
+
|
|
580
591
|
with open(output_file,'w') as f:
|
|
581
592
|
json.dump(results,f,indent=1,default=str)
|
|
582
|
-
|
|
593
|
+
|
|
583
594
|
elif output_file.endswith('.csv'):
|
|
584
|
-
|
|
595
|
+
|
|
585
596
|
# Find all EXIF tags that exist in any image
|
|
586
597
|
all_keys = set()
|
|
587
598
|
for im in results:
|
|
588
|
-
|
|
599
|
+
|
|
589
600
|
keys_this_image = set()
|
|
590
601
|
exif_tags = im['exif_tags']
|
|
591
602
|
file_name = im['file_name']
|
|
@@ -595,51 +606,51 @@ def _write_exif_results(results,output_file):
|
|
|
595
606
|
'Error: tag {} appears twice in image {}'.format(
|
|
596
607
|
tag_name,file_name)
|
|
597
608
|
all_keys.add(tag_name)
|
|
598
|
-
|
|
609
|
+
|
|
599
610
|
# ...for each tag in this image
|
|
600
|
-
|
|
611
|
+
|
|
601
612
|
# ...for each image
|
|
602
|
-
|
|
613
|
+
|
|
603
614
|
all_keys = sorted(list(all_keys))
|
|
604
|
-
|
|
615
|
+
|
|
605
616
|
header = ['File Name']
|
|
606
617
|
header.extend(all_keys)
|
|
607
|
-
|
|
618
|
+
|
|
608
619
|
import csv
|
|
609
620
|
with open(output_file,'w') as csvfile:
|
|
610
|
-
|
|
621
|
+
|
|
611
622
|
writer = csv.writer(csvfile)
|
|
612
|
-
|
|
623
|
+
|
|
613
624
|
# Write header
|
|
614
625
|
writer.writerow(header)
|
|
615
|
-
|
|
626
|
+
|
|
616
627
|
for im in results:
|
|
617
|
-
|
|
628
|
+
|
|
618
629
|
row = [im['file_name']]
|
|
619
630
|
kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
|
|
620
|
-
|
|
631
|
+
|
|
621
632
|
for i_key,key in enumerate(all_keys):
|
|
622
633
|
value = ''
|
|
623
634
|
if key in kvp_this_image:
|
|
624
635
|
value = kvp_this_image[key]
|
|
625
|
-
row.append(value)
|
|
636
|
+
row.append(value)
|
|
626
637
|
# ...for each key that *might* be present in this image
|
|
627
|
-
|
|
638
|
+
|
|
628
639
|
assert len(row) == len(header)
|
|
629
|
-
|
|
640
|
+
|
|
630
641
|
writer.writerow(row)
|
|
631
|
-
|
|
642
|
+
|
|
632
643
|
# ...for each image
|
|
633
|
-
|
|
644
|
+
|
|
634
645
|
# ...with open()
|
|
635
|
-
|
|
646
|
+
|
|
636
647
|
else:
|
|
637
|
-
|
|
648
|
+
|
|
638
649
|
raise ValueError('Could not determine output type from file {}'.format(
|
|
639
650
|
output_file))
|
|
640
|
-
|
|
651
|
+
|
|
641
652
|
# ...if we're writing to .json/.csv
|
|
642
|
-
|
|
653
|
+
|
|
643
654
|
print('Wrote results to {}'.format(output_file))
|
|
644
655
|
|
|
645
656
|
# ..._write_exif_results(...)
|
|
@@ -648,7 +659,7 @@ def _write_exif_results(results,output_file):
|
|
|
648
659
|
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
|
|
649
660
|
"""
|
|
650
661
|
Read EXIF data for a folder of images.
|
|
651
|
-
|
|
662
|
+
|
|
652
663
|
Args:
|
|
653
664
|
input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
|
|
654
665
|
paths
|
|
@@ -660,22 +671,22 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
660
671
|
recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
|
|
661
672
|
is None.
|
|
662
673
|
verbose (bool, optional): enable additional debug output
|
|
663
|
-
|
|
674
|
+
|
|
664
675
|
Returns:
|
|
665
676
|
list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
|
|
666
677
|
* 'file_name': the relative path to the image
|
|
667
678
|
* 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
|
|
668
679
|
"""
|
|
669
|
-
|
|
680
|
+
|
|
670
681
|
if options is None:
|
|
671
682
|
options = ReadExifOptions()
|
|
672
|
-
|
|
683
|
+
|
|
673
684
|
# Validate options
|
|
674
685
|
if options.tags_to_include is not None:
|
|
675
686
|
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
676
687
|
if options.tags_to_exclude is not None:
|
|
677
|
-
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
678
|
-
|
|
688
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
689
|
+
|
|
679
690
|
if input_folder is None:
|
|
680
691
|
input_folder = ''
|
|
681
692
|
if len(input_folder) > 0:
|
|
@@ -684,12 +695,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
684
695
|
|
|
685
696
|
assert (len(input_folder) > 0) or (filenames is not None), \
|
|
686
697
|
'Must specify either a folder or a list of files'
|
|
687
|
-
|
|
688
|
-
if output_file is not None:
|
|
689
|
-
|
|
698
|
+
|
|
699
|
+
if output_file is not None:
|
|
700
|
+
|
|
690
701
|
assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
|
|
691
702
|
'I only know how to write results to .json or .csv'
|
|
692
|
-
|
|
703
|
+
|
|
693
704
|
try:
|
|
694
705
|
with open(output_file, 'a') as f:
|
|
695
706
|
if not f.writable():
|
|
@@ -697,7 +708,7 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
697
708
|
except Exception:
|
|
698
709
|
print('Could not write to file {}'.format(output_file))
|
|
699
710
|
raise
|
|
700
|
-
|
|
711
|
+
|
|
701
712
|
if options.processing_library == 'exif':
|
|
702
713
|
assert is_executable(options.exiftool_command_name), 'exiftool not available'
|
|
703
714
|
|
|
@@ -706,9 +717,9 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
706
717
|
else:
|
|
707
718
|
assert isinstance(filenames,list)
|
|
708
719
|
images = _create_image_objects(filenames)
|
|
709
|
-
|
|
720
|
+
|
|
710
721
|
results = _populate_exif_for_images(input_folder,images,options)
|
|
711
|
-
|
|
722
|
+
|
|
712
723
|
if output_file is not None:
|
|
713
724
|
try:
|
|
714
725
|
_write_exif_results(results,output_file)
|
|
@@ -716,8 +727,8 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
716
727
|
if not options.allow_write_error:
|
|
717
728
|
raise
|
|
718
729
|
else:
|
|
719
|
-
print('Warning: error serializing EXIF data: {}'.format(str(e)))
|
|
720
|
-
|
|
730
|
+
print('Warning: error serializing EXIF data: {}'.format(str(e)))
|
|
731
|
+
|
|
721
732
|
return results
|
|
722
733
|
|
|
723
734
|
# ...read_exif_from_folder(...)
|
|
@@ -726,54 +737,54 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
726
737
|
def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
727
738
|
"""
|
|
728
739
|
Given the EXIF results for a folder of images read via read_exif_from_folder,
|
|
729
|
-
create a COCO Camera Traps .json file that has no annotations, but
|
|
740
|
+
create a COCO Camera Traps .json file that has no annotations, but
|
|
730
741
|
attaches image filenames to locations and datetimes.
|
|
731
|
-
|
|
742
|
+
|
|
732
743
|
Args:
|
|
733
744
|
exif_results (str or list): the filename (or loaded list) containing the results
|
|
734
745
|
from read_exif_from_folder
|
|
735
|
-
cct_output_file (str,optional): the filename to which we should write
|
|
746
|
+
cct_output_file (str,optional): the filename to which we should write
|
|
736
747
|
COCO-Camera-Traps-formatted data
|
|
737
748
|
options (ExifResultsToCCTOptions, optional): options guiding the generation
|
|
738
749
|
of the CCT file, particularly location mapping
|
|
739
|
-
|
|
750
|
+
|
|
740
751
|
Returns:
|
|
741
752
|
dict: a COCO Camera Traps dict (with no annotations).
|
|
742
753
|
"""
|
|
743
|
-
|
|
754
|
+
|
|
744
755
|
if options is None:
|
|
745
756
|
options = ExifResultsToCCTOptions()
|
|
746
|
-
|
|
757
|
+
|
|
747
758
|
if isinstance(exif_results,str):
|
|
748
759
|
print('Reading EXIF results from {}'.format(exif_results))
|
|
749
760
|
with open(exif_results,'r') as f:
|
|
750
761
|
exif_results = json.load(f)
|
|
751
762
|
else:
|
|
752
763
|
assert isinstance(exif_results,list)
|
|
753
|
-
|
|
764
|
+
|
|
754
765
|
now = datetime.now()
|
|
755
766
|
|
|
756
767
|
image_info = []
|
|
757
768
|
|
|
758
769
|
images_without_datetime = []
|
|
759
770
|
images_with_invalid_datetime = []
|
|
760
|
-
|
|
771
|
+
|
|
761
772
|
# exif_result = exif_results[0]
|
|
762
773
|
for exif_result in tqdm(exif_results):
|
|
763
|
-
|
|
774
|
+
|
|
764
775
|
im = {}
|
|
765
|
-
|
|
776
|
+
|
|
766
777
|
# By default we assume that each leaf-node folder is a location
|
|
767
778
|
if options.filename_to_location_function is None:
|
|
768
779
|
im['location'] = 'unknown'
|
|
769
780
|
else:
|
|
770
|
-
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
771
|
-
|
|
781
|
+
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
782
|
+
|
|
772
783
|
im['file_name'] = exif_result['file_name']
|
|
773
784
|
im['id'] = im['file_name']
|
|
774
|
-
|
|
785
|
+
|
|
775
786
|
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
776
|
-
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
787
|
+
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
777
788
|
exif_dt = None
|
|
778
789
|
else:
|
|
779
790
|
exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
|
|
@@ -783,26 +794,26 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
783
794
|
images_without_datetime.append(im['file_name'])
|
|
784
795
|
else:
|
|
785
796
|
dt = exif_dt
|
|
786
|
-
|
|
797
|
+
|
|
787
798
|
# An image from the future (or within the last 24 hours) is invalid
|
|
788
799
|
if (now - dt).total_seconds() <= 1*24*60*60:
|
|
789
800
|
print('Warning: datetime for {} is {}'.format(
|
|
790
801
|
im['file_name'],dt))
|
|
791
|
-
im['datetime'] = None
|
|
802
|
+
im['datetime'] = None
|
|
792
803
|
images_with_invalid_datetime.append(im['file_name'])
|
|
793
|
-
|
|
804
|
+
|
|
794
805
|
# An image from before the dawn of time is also invalid
|
|
795
806
|
elif dt.year < options.min_valid_timestamp_year:
|
|
796
807
|
print('Warning: datetime for {} is {}'.format(
|
|
797
808
|
im['file_name'],dt))
|
|
798
809
|
im['datetime'] = None
|
|
799
810
|
images_with_invalid_datetime.append(im['file_name'])
|
|
800
|
-
|
|
811
|
+
|
|
801
812
|
else:
|
|
802
813
|
im['datetime'] = dt
|
|
803
814
|
|
|
804
815
|
image_info.append(im)
|
|
805
|
-
|
|
816
|
+
|
|
806
817
|
# ...for each exif image result
|
|
807
818
|
|
|
808
819
|
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
@@ -813,21 +824,21 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
813
824
|
d['images'] = image_info
|
|
814
825
|
d['annotations'] = []
|
|
815
826
|
d['categories'] = []
|
|
816
|
-
|
|
827
|
+
|
|
817
828
|
if cct_output_file is not None:
|
|
818
829
|
write_object_with_serialized_datetimes(d,cct_output_file)
|
|
819
|
-
|
|
830
|
+
|
|
820
831
|
return d
|
|
821
832
|
|
|
822
833
|
# ...exif_results_to_cct(...)
|
|
823
834
|
|
|
824
|
-
|
|
835
|
+
|
|
825
836
|
#%% Interactive driver
|
|
826
837
|
|
|
827
838
|
if False:
|
|
828
|
-
|
|
839
|
+
|
|
829
840
|
#%%
|
|
830
|
-
|
|
841
|
+
|
|
831
842
|
input_folder = r'C:\temp\md-name-testing'
|
|
832
843
|
output_file = None # r'C:\temp\md-name-testing\exif.json'
|
|
833
844
|
options = ReadExifOptions()
|
|
@@ -836,30 +847,28 @@ if False:
|
|
|
836
847
|
options.use_threads = False
|
|
837
848
|
options.processing_library = 'pil'
|
|
838
849
|
# options.processing_library = 'exiftool'
|
|
839
|
-
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
|
|
850
|
+
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
|
|
851
|
+
'ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
|
|
840
852
|
# options.tags_to_exclude = ['MakerNote']
|
|
841
|
-
|
|
853
|
+
|
|
842
854
|
results = read_exif_from_folder(input_folder,output_file,options)
|
|
843
855
|
|
|
844
856
|
#%%
|
|
845
|
-
|
|
857
|
+
|
|
846
858
|
with open(output_file,'r') as f:
|
|
847
859
|
d = json.load(f)
|
|
848
|
-
|
|
849
860
|
|
|
850
|
-
#%% Command-line driver
|
|
851
861
|
|
|
852
|
-
|
|
853
|
-
import sys
|
|
862
|
+
#%% Command-line driver
|
|
854
863
|
|
|
855
|
-
def main():
|
|
864
|
+
def main(): # noqa
|
|
856
865
|
|
|
857
866
|
options = ReadExifOptions()
|
|
858
|
-
|
|
867
|
+
|
|
859
868
|
parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
|
|
860
869
|
' a folder, and write the results to .csv or .json'))
|
|
861
870
|
|
|
862
|
-
parser.add_argument('input_folder', type=str,
|
|
871
|
+
parser.add_argument('input_folder', type=str,
|
|
863
872
|
help='Folder of images from which we should read EXIF information')
|
|
864
873
|
parser.add_argument('output_file', type=str,
|
|
865
874
|
help='Output file (.json) to which we should write EXIF information')
|
|
@@ -869,16 +878,16 @@ def main():
|
|
|
869
878
|
help='Use threads (instead of processes) for multitasking')
|
|
870
879
|
parser.add_argument('--processing_library', type=str, default=options.processing_library,
|
|
871
880
|
help='Processing library (exif or pil)')
|
|
872
|
-
|
|
881
|
+
|
|
873
882
|
if len(sys.argv[1:]) == 0:
|
|
874
883
|
parser.print_help()
|
|
875
884
|
parser.exit()
|
|
876
885
|
|
|
877
|
-
args = parser.parse_args()
|
|
886
|
+
args = parser.parse_args()
|
|
878
887
|
args_to_object(args, options)
|
|
879
888
|
options.processing_library = options.processing_library.lower()
|
|
880
|
-
|
|
889
|
+
|
|
881
890
|
read_exif_from_folder(args.input_folder,args.output_file,options)
|
|
882
|
-
|
|
891
|
+
|
|
883
892
|
if __name__ == '__main__':
|
|
884
893
|
main()
|