megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
read_exif.py
|
|
4
4
|
|
|
5
|
-
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
-
and writes them to a .json or .csv file.
|
|
5
|
+
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
+
and writes them to a .json or .csv file.
|
|
7
7
|
|
|
8
8
|
This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
|
|
9
9
|
can read everything). The latter approach expects that exiftool is available on the system
|
|
@@ -16,6 +16,9 @@ path. No attempt is made to be consistent in format across the two approaches.
|
|
|
16
16
|
import os
|
|
17
17
|
import subprocess
|
|
18
18
|
import json
|
|
19
|
+
import argparse
|
|
20
|
+
import sys
|
|
21
|
+
|
|
19
22
|
from datetime import datetime
|
|
20
23
|
|
|
21
24
|
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
@@ -23,6 +26,7 @@ from multiprocessing.pool import Pool as Pool
|
|
|
23
26
|
|
|
24
27
|
from tqdm import tqdm
|
|
25
28
|
from PIL import Image, ExifTags
|
|
29
|
+
from functools import partial
|
|
26
30
|
|
|
27
31
|
from megadetector.utils.path_utils import find_images, is_executable
|
|
28
32
|
from megadetector.utils.ct_utils import args_to_object
|
|
@@ -31,7 +35,8 @@ from megadetector.data_management.cct_json_utils import write_object_with_serial
|
|
|
31
35
|
|
|
32
36
|
debug_max_images = None
|
|
33
37
|
|
|
34
|
-
minimal_exif_tags =
|
|
38
|
+
minimal_exif_tags = \
|
|
39
|
+
['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTimeOriginal','Orientation']
|
|
35
40
|
|
|
36
41
|
|
|
37
42
|
#%% Options
|
|
@@ -40,85 +45,84 @@ class ReadExifOptions:
|
|
|
40
45
|
"""
|
|
41
46
|
Parameters controlling metadata extraction.
|
|
42
47
|
"""
|
|
43
|
-
|
|
48
|
+
|
|
44
49
|
def __init__(self):
|
|
45
|
-
|
|
50
|
+
|
|
46
51
|
#: Enable additional debug console output
|
|
47
52
|
self.verbose = False
|
|
48
|
-
|
|
53
|
+
|
|
49
54
|
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
50
55
|
#: and we encounter a serialization issue, we'll return the results but won't
|
|
51
|
-
#: error.
|
|
56
|
+
#: error.
|
|
52
57
|
self.allow_write_error = False
|
|
53
|
-
|
|
58
|
+
|
|
54
59
|
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
55
60
|
self.n_workers = 1
|
|
56
|
-
|
|
61
|
+
|
|
57
62
|
#: Should we use threads (vs. processes) for parallelization?
|
|
58
63
|
#:
|
|
59
64
|
#: Not relevant if n_workers is <= 1.
|
|
60
65
|
self.use_threads = True
|
|
61
|
-
|
|
62
|
-
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
66
|
+
|
|
67
|
+
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
63
68
|
#: doesn't come from EXIF, rather from the file (e.g. file size).
|
|
64
69
|
self.tag_types_to_ignore = set(['File','ExifTool'])
|
|
65
|
-
|
|
70
|
+
|
|
66
71
|
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
67
72
|
#:
|
|
68
73
|
#: A useful set of tags one might want to limit queries for:
|
|
69
74
|
#:
|
|
70
|
-
#: options.tags_to_include =
|
|
71
|
-
#: 'DateTimeOriginal','Orientation']
|
|
75
|
+
#: options.tags_to_include = minimal_exif_tags
|
|
72
76
|
self.tags_to_include = None
|
|
73
|
-
|
|
77
|
+
|
|
74
78
|
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
75
79
|
self.tags_to_exclude = None
|
|
76
|
-
|
|
80
|
+
|
|
77
81
|
#: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
|
|
78
82
|
#: can be just "exiftool", in which case it should be on your system path.
|
|
79
83
|
self.exiftool_command_name = 'exiftool'
|
|
80
|
-
|
|
84
|
+
|
|
81
85
|
#: How should we handle byte-formatted EXIF tags?
|
|
82
86
|
#:
|
|
83
87
|
#: 'convert_to_string': convert to a Python string
|
|
84
88
|
#: 'delete': don't include at all
|
|
85
89
|
#: 'raw': include as a byte string
|
|
86
90
|
self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
87
|
-
|
|
91
|
+
|
|
88
92
|
#: Should we use exiftool or PIL?
|
|
89
93
|
self.processing_library = 'pil' # 'exiftool','pil'
|
|
90
|
-
|
|
94
|
+
|
|
91
95
|
|
|
92
96
|
class ExifResultsToCCTOptions:
|
|
93
97
|
"""
|
|
94
|
-
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
98
|
+
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
95
99
|
information) extracted by read_exif_from_folder().
|
|
96
100
|
"""
|
|
97
|
-
|
|
101
|
+
|
|
98
102
|
def __init__(self):
|
|
99
|
-
|
|
103
|
+
|
|
100
104
|
#: Timestamps older than this are assumed to be junk; lots of cameras use a
|
|
101
105
|
#: default time in 2000.
|
|
102
106
|
self.min_valid_timestamp_year = 2001
|
|
103
|
-
|
|
107
|
+
|
|
104
108
|
#: The EXIF tag from which to pull datetime information
|
|
105
109
|
self.exif_datetime_tag = 'DateTimeOriginal'
|
|
106
|
-
|
|
110
|
+
|
|
107
111
|
#: Function for extracting location information, should take a string
|
|
108
112
|
#: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
|
|
109
113
|
#: this is None, location is written as "unknown".
|
|
110
114
|
self.filename_to_location_function = image_file_to_camera_folder
|
|
111
|
-
|
|
115
|
+
|
|
112
116
|
|
|
113
117
|
#%% Functions
|
|
114
118
|
|
|
115
119
|
def _get_exif_ifd(exif):
|
|
116
120
|
"""
|
|
117
121
|
Read EXIF data from by finding the EXIF offset and reading tags directly
|
|
118
|
-
|
|
122
|
+
|
|
119
123
|
https://github.com/python-pillow/Pillow/issues/5863
|
|
120
124
|
"""
|
|
121
|
-
|
|
125
|
+
|
|
122
126
|
# Find the offset for all the EXIF information
|
|
123
127
|
for key, value in ExifTags.TAGS.items():
|
|
124
128
|
if value == "ExifOffset":
|
|
@@ -132,19 +136,19 @@ def _get_exif_ifd(exif):
|
|
|
132
136
|
|
|
133
137
|
def has_gps_info(im):
|
|
134
138
|
"""
|
|
135
|
-
Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
|
|
139
|
+
Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
|
|
136
140
|
determine whether GPS location information is present in this image. Does not retrieve
|
|
137
141
|
location info, currently only used to determine whether it's present.
|
|
138
|
-
|
|
142
|
+
|
|
139
143
|
Args:
|
|
140
144
|
im (str, PIL.Image.Image, dict): image for which we should determine GPS metadata
|
|
141
145
|
presence
|
|
142
|
-
|
|
146
|
+
|
|
143
147
|
Returns:
|
|
144
148
|
bool: whether GPS metadata is present, or None if we failed to read EXIF data from
|
|
145
149
|
a file.
|
|
146
150
|
"""
|
|
147
|
-
|
|
151
|
+
|
|
148
152
|
if isinstance(im,str) or isinstance(im,Image.Image):
|
|
149
153
|
exif_tags = read_pil_exif(im)
|
|
150
154
|
if exif_tags is None:
|
|
@@ -153,57 +157,57 @@ def has_gps_info(im):
|
|
|
153
157
|
else:
|
|
154
158
|
assert isinstance(im,dict)
|
|
155
159
|
exif_tags = im
|
|
156
|
-
|
|
160
|
+
|
|
157
161
|
if 'exif_tags' in exif_tags:
|
|
158
162
|
exif_tags = exif_tags['exif_tags']
|
|
159
163
|
if exif_tags is None:
|
|
160
164
|
return None
|
|
161
|
-
|
|
165
|
+
|
|
162
166
|
if 'GPSInfo' in exif_tags and \
|
|
163
167
|
exif_tags['GPSInfo'] is not None and \
|
|
164
168
|
isinstance(exif_tags['GPSInfo'],dict):
|
|
165
|
-
|
|
169
|
+
|
|
166
170
|
# Don't indicate that GPS data is present if only GPS version info is present
|
|
167
171
|
if ('GPSLongitude' in exif_tags['GPSInfo']) or ('GPSLatitude' in exif_tags['GPSInfo']):
|
|
168
172
|
return True
|
|
169
173
|
return False
|
|
170
|
-
|
|
174
|
+
|
|
171
175
|
return False
|
|
172
|
-
|
|
173
|
-
# ...def has_gps_info(...)
|
|
176
|
+
|
|
177
|
+
# ...def has_gps_info(...)
|
|
174
178
|
|
|
175
179
|
|
|
176
180
|
def read_pil_exif(im,options=None):
|
|
177
181
|
"""
|
|
178
182
|
Read all the EXIF data we know how to read from an image, using PIL. This is primarily
|
|
179
|
-
an internal function; the main entry point for single-image EXIF information is
|
|
183
|
+
an internal function; the main entry point for single-image EXIF information is
|
|
180
184
|
read_exif_tags_for_image().
|
|
181
|
-
|
|
185
|
+
|
|
182
186
|
Args:
|
|
183
|
-
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
187
|
+
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
184
188
|
we should read EXIF data.
|
|
185
|
-
|
|
189
|
+
|
|
186
190
|
Returns:
|
|
187
191
|
dict: a dictionary mapping EXIF tag names to their values
|
|
188
192
|
"""
|
|
189
|
-
|
|
193
|
+
|
|
190
194
|
if options is None:
|
|
191
195
|
options = ReadExifOptions()
|
|
192
|
-
|
|
196
|
+
|
|
193
197
|
image_name = '[image]'
|
|
194
198
|
if isinstance(im,str):
|
|
195
199
|
image_name = im
|
|
196
200
|
im = Image.open(im)
|
|
197
|
-
|
|
201
|
+
|
|
198
202
|
exif_tags = {}
|
|
199
203
|
try:
|
|
200
204
|
exif_info = im.getexif()
|
|
201
205
|
except Exception:
|
|
202
206
|
exif_info = None
|
|
203
|
-
|
|
207
|
+
|
|
204
208
|
if exif_info is None:
|
|
205
209
|
return exif_tags
|
|
206
|
-
|
|
210
|
+
|
|
207
211
|
for k, v in exif_info.items():
|
|
208
212
|
assert isinstance(k,str) or isinstance(k,int), \
|
|
209
213
|
'Invalid EXIF key {}'.format(str(k))
|
|
@@ -212,9 +216,9 @@ def read_pil_exif(im,options=None):
|
|
|
212
216
|
else:
|
|
213
217
|
# print('Warning: unrecognized EXIF tag: {}'.format(k))
|
|
214
218
|
exif_tags[k] = str(v)
|
|
215
|
-
|
|
219
|
+
|
|
216
220
|
exif_ifd_tags = _get_exif_ifd(exif_info)
|
|
217
|
-
|
|
221
|
+
|
|
218
222
|
for k in exif_ifd_tags.keys():
|
|
219
223
|
v = exif_ifd_tags[k]
|
|
220
224
|
if k in exif_tags:
|
|
@@ -223,16 +227,16 @@ def read_pil_exif(im,options=None):
|
|
|
223
227
|
k,image_name,exif_tags[k],v))
|
|
224
228
|
else:
|
|
225
229
|
exif_tags[k] = v
|
|
226
|
-
|
|
230
|
+
|
|
227
231
|
exif_tag_names = list(exif_tags.keys())
|
|
228
|
-
|
|
232
|
+
|
|
229
233
|
# Type conversion and cleanup
|
|
230
|
-
#
|
|
234
|
+
#
|
|
231
235
|
# Most quirky types will get serialized to string when we write to .json.
|
|
232
236
|
for k in exif_tag_names:
|
|
233
|
-
|
|
237
|
+
|
|
234
238
|
if isinstance(exif_tags[k],bytes):
|
|
235
|
-
|
|
239
|
+
|
|
236
240
|
if options.byte_handling == 'delete':
|
|
237
241
|
del exif_tags[k]
|
|
238
242
|
elif options.byte_handling == 'raw':
|
|
@@ -240,24 +244,24 @@ def read_pil_exif(im,options=None):
|
|
|
240
244
|
else:
|
|
241
245
|
assert options.byte_handling == 'convert_to_string'
|
|
242
246
|
exif_tags[k] = str(exif_tags[k])
|
|
243
|
-
|
|
247
|
+
|
|
244
248
|
elif isinstance(exif_tags[k],str):
|
|
245
|
-
|
|
249
|
+
|
|
246
250
|
exif_tags[k] = exif_tags[k].strip()
|
|
247
|
-
|
|
248
|
-
# Special case for GPS info... I could decode other encoded tags, but GPS info is
|
|
251
|
+
|
|
252
|
+
# Special case for GPS info... I could decode other encoded tags, but GPS info is
|
|
249
253
|
# particularly important, so I'm only doing that for now.
|
|
250
254
|
if 'GPSInfo' in exif_tags:
|
|
251
|
-
|
|
255
|
+
|
|
252
256
|
try:
|
|
253
|
-
|
|
257
|
+
|
|
254
258
|
# Find the tag number for GPS info, in practice should alays be 34853
|
|
255
|
-
|
|
256
|
-
assert
|
|
257
|
-
|
|
259
|
+
gpsinfo_tag = next(tag for tag, name in ExifTags.TAGS.items() if name == "GPSInfo")
|
|
260
|
+
assert gpsinfo_tag == 34853
|
|
261
|
+
|
|
258
262
|
# These are integer keys, e.g. {7: (14.0, 27.0, 7.24)}
|
|
259
|
-
gps_info_raw = exif_info.get_ifd(
|
|
260
|
-
|
|
263
|
+
gps_info_raw = exif_info.get_ifd(gpsinfo_tag)
|
|
264
|
+
|
|
261
265
|
# Convert to strings, e.g. 'GPSTimeStamp'
|
|
262
266
|
gps_info = {}
|
|
263
267
|
for int_tag,v in enumerate(gps_info_raw.keys()):
|
|
@@ -266,15 +270,15 @@ def read_pil_exif(im,options=None):
|
|
|
266
270
|
gps_info[ExifTags.GPSTAGS[int_tag]] = v
|
|
267
271
|
else:
|
|
268
272
|
gps_info[int_tag] = v
|
|
269
|
-
|
|
273
|
+
|
|
270
274
|
exif_tags['GPSInfo'] = gps_info
|
|
271
|
-
|
|
275
|
+
|
|
272
276
|
except Exception as e:
|
|
273
277
|
if options.verbose:
|
|
274
278
|
print('Warning: error reading GPS info: {}'.format(str(e)))
|
|
275
|
-
|
|
279
|
+
|
|
276
280
|
# ...if we think there might be GPS tags in this image
|
|
277
|
-
|
|
281
|
+
|
|
278
282
|
return exif_tags
|
|
279
283
|
|
|
280
284
|
# ...read_pil_exif()
|
|
@@ -285,26 +289,26 @@ def format_datetime_as_exif_datetime_string(dt):
|
|
|
285
289
|
Returns a Python datetime object rendered using the standard EXIF datetime
|
|
286
290
|
string format ('%Y:%m:%d %H:%M:%S')
|
|
287
291
|
"""
|
|
288
|
-
|
|
292
|
+
|
|
289
293
|
return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
|
|
290
|
-
|
|
294
|
+
|
|
291
295
|
|
|
292
296
|
def parse_exif_datetime_string(s,verbose=False):
|
|
293
297
|
""""
|
|
294
|
-
Exif datetimes are strings, but in a standard format:
|
|
295
|
-
|
|
298
|
+
Exif datetimes are strings, but in a standard format:
|
|
299
|
+
|
|
296
300
|
%Y:%m:%d %H:%M:%S
|
|
297
|
-
|
|
301
|
+
|
|
298
302
|
Parses one of those strings into a Python datetime object.
|
|
299
|
-
|
|
303
|
+
|
|
300
304
|
Args:
|
|
301
305
|
s (str): datetime string to parse, should be in standard EXIF datetime format
|
|
302
306
|
verbose (bool, optional): enable additional debug output
|
|
303
|
-
|
|
307
|
+
|
|
304
308
|
Returns:
|
|
305
309
|
datetime: the datetime object created from [s]
|
|
306
310
|
"""
|
|
307
|
-
|
|
311
|
+
|
|
308
312
|
dt = None
|
|
309
313
|
try:
|
|
310
314
|
dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
|
|
@@ -319,7 +323,7 @@ def _filter_tags(tags,options):
|
|
|
319
323
|
Internal function used to include/exclude specific tags from the exif_tags
|
|
320
324
|
dict.
|
|
321
325
|
"""
|
|
322
|
-
|
|
326
|
+
|
|
323
327
|
if options is None:
|
|
324
328
|
return tags
|
|
325
329
|
if options.tags_to_include is None and options.tags_to_exclude is None:
|
|
@@ -343,23 +347,23 @@ def _filter_tags(tags,options):
|
|
|
343
347
|
def read_exif_tags_for_image(file_path,options=None):
|
|
344
348
|
"""
|
|
345
349
|
Get relevant fields from EXIF data for an image
|
|
346
|
-
|
|
350
|
+
|
|
347
351
|
Returns:
|
|
348
|
-
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
352
|
+
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
349
353
|
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
350
354
|
options.processing_library:
|
|
351
|
-
|
|
355
|
+
|
|
352
356
|
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
|
|
353
357
|
- For PIL, 'tags' is a dict (str:str)
|
|
354
358
|
"""
|
|
355
|
-
|
|
359
|
+
|
|
356
360
|
if options is None:
|
|
357
361
|
options = ReadExifOptions()
|
|
358
|
-
|
|
362
|
+
|
|
359
363
|
result = {'status':'unknown','tags':[]}
|
|
360
|
-
|
|
364
|
+
|
|
361
365
|
if options.processing_library == 'pil':
|
|
362
|
-
|
|
366
|
+
|
|
363
367
|
try:
|
|
364
368
|
exif_tags = read_pil_exif(file_path,options)
|
|
365
369
|
|
|
@@ -369,18 +373,18 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
369
373
|
file_path,str(e)))
|
|
370
374
|
result['status'] = 'read_failure'
|
|
371
375
|
result['error'] = str(e)
|
|
372
|
-
|
|
376
|
+
|
|
373
377
|
if result['status'] == 'unknown':
|
|
374
|
-
if exif_tags is None:
|
|
378
|
+
if exif_tags is None:
|
|
375
379
|
result['status'] = 'empty_read'
|
|
376
380
|
else:
|
|
377
381
|
result['status'] = 'success'
|
|
378
382
|
result['tags'] = _filter_tags(exif_tags,options)
|
|
379
|
-
|
|
383
|
+
|
|
380
384
|
return result
|
|
381
|
-
|
|
385
|
+
|
|
382
386
|
elif options.processing_library == 'exiftool':
|
|
383
|
-
|
|
387
|
+
|
|
384
388
|
# -G means "Print group name for each tag", e.g. print:
|
|
385
389
|
#
|
|
386
390
|
# [File] Bits Per Sample : 8
|
|
@@ -390,95 +394,95 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
390
394
|
# Bits Per Sample : 8
|
|
391
395
|
proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
|
|
392
396
|
stdout=subprocess.PIPE, encoding='utf8')
|
|
393
|
-
|
|
394
|
-
exif_lines = proc.stdout.readlines()
|
|
397
|
+
|
|
398
|
+
exif_lines = proc.stdout.readlines()
|
|
395
399
|
exif_lines = [s.strip() for s in exif_lines]
|
|
396
400
|
if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
|
|
397
401
|
any([s.lower().startswith('[exif]') for s in exif_lines])):
|
|
398
402
|
result['status'] = 'failure'
|
|
399
403
|
return result
|
|
400
|
-
|
|
404
|
+
|
|
401
405
|
# A list of three-element lists (type/tag/value)
|
|
402
406
|
exif_tags = []
|
|
403
|
-
|
|
407
|
+
|
|
404
408
|
# line_raw = exif_lines[0]
|
|
405
409
|
for line_raw in exif_lines:
|
|
406
|
-
|
|
410
|
+
|
|
407
411
|
# A typical line:
|
|
408
412
|
#
|
|
409
413
|
# [ExifTool] ExifTool Version Number : 12.13
|
|
410
|
-
|
|
414
|
+
|
|
411
415
|
line = line_raw.strip()
|
|
412
|
-
|
|
416
|
+
|
|
413
417
|
# Split on the first occurrence of ":"
|
|
414
418
|
tokens = line.split(':',1)
|
|
415
419
|
assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
|
|
416
420
|
len(tokens))
|
|
417
|
-
|
|
418
|
-
field_value = tokens[1].strip()
|
|
419
|
-
|
|
420
|
-
field_name_type = tokens[0].strip()
|
|
421
|
+
|
|
422
|
+
field_value = tokens[1].strip()
|
|
423
|
+
|
|
424
|
+
field_name_type = tokens[0].strip()
|
|
421
425
|
field_name_type_tokens = field_name_type.split(None,1)
|
|
422
426
|
assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
|
|
423
|
-
|
|
427
|
+
|
|
424
428
|
field_type = field_name_type_tokens[0].strip()
|
|
425
429
|
assert field_type.startswith('[') and field_type.endswith(']'), \
|
|
426
430
|
'Invalid EXIF field {}'.format(field_type)
|
|
427
431
|
field_type = field_type[1:-1]
|
|
428
|
-
|
|
432
|
+
|
|
429
433
|
if field_type in options.tag_types_to_ignore:
|
|
430
434
|
if options.verbose:
|
|
431
435
|
print('Ignoring tag with type {}'.format(field_type))
|
|
432
|
-
continue
|
|
433
|
-
|
|
436
|
+
continue
|
|
437
|
+
|
|
434
438
|
field_name = field_name_type_tokens[1].strip()
|
|
435
439
|
if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
|
|
436
440
|
continue
|
|
437
441
|
if options.tags_to_include is not None and field_name not in options.tags_to_include:
|
|
438
442
|
continue
|
|
439
443
|
tag = [field_type,field_name,field_value]
|
|
440
|
-
|
|
444
|
+
|
|
441
445
|
exif_tags.append(tag)
|
|
442
|
-
|
|
446
|
+
|
|
443
447
|
# ...for each output line
|
|
444
|
-
|
|
448
|
+
|
|
445
449
|
result['status'] = 'success'
|
|
446
450
|
result['tags'] = exif_tags
|
|
447
451
|
return result
|
|
448
|
-
|
|
452
|
+
|
|
449
453
|
else:
|
|
450
|
-
|
|
454
|
+
|
|
451
455
|
raise ValueError('Unknown processing library {}'.format(
|
|
452
456
|
options.processing_library))
|
|
453
457
|
|
|
454
458
|
# ...which processing library are we using?
|
|
455
|
-
|
|
459
|
+
|
|
456
460
|
# ...read_exif_tags_for_image()
|
|
457
461
|
|
|
458
462
|
|
|
459
463
|
def _populate_exif_data(im, image_base, options=None):
|
|
460
464
|
"""
|
|
461
465
|
Populate EXIF data into the 'exif_tags' field in the image object [im].
|
|
462
|
-
|
|
466
|
+
|
|
463
467
|
im['file_name'] should be prepopulated, relative to image_base.
|
|
464
|
-
|
|
468
|
+
|
|
465
469
|
Returns a modified version of [im], also modifies [im] in place.
|
|
466
470
|
"""
|
|
467
|
-
|
|
471
|
+
|
|
468
472
|
if options is None:
|
|
469
473
|
options = ReadExifOptions()
|
|
470
474
|
|
|
471
475
|
fn = im['file_name']
|
|
472
476
|
if options.verbose:
|
|
473
477
|
print('Processing {}'.format(fn))
|
|
474
|
-
|
|
478
|
+
|
|
475
479
|
try:
|
|
476
|
-
|
|
480
|
+
|
|
477
481
|
file_path = os.path.join(image_base,fn)
|
|
478
482
|
assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
|
|
479
483
|
result = read_exif_tags_for_image(file_path,options)
|
|
480
484
|
if result['status'] == 'success':
|
|
481
|
-
exif_tags = result['tags']
|
|
485
|
+
exif_tags = result['tags']
|
|
482
486
|
im['exif_tags'] = exif_tags
|
|
483
487
|
else:
|
|
484
488
|
im['exif_tags'] = None
|
|
@@ -487,15 +491,15 @@ def _populate_exif_data(im, image_base, options=None):
|
|
|
487
491
|
im['error'] = result['error']
|
|
488
492
|
if options.verbose:
|
|
489
493
|
print('Error reading EXIF data for {}'.format(file_path))
|
|
490
|
-
|
|
494
|
+
|
|
491
495
|
except Exception as e:
|
|
492
|
-
|
|
496
|
+
|
|
493
497
|
s = 'Error on {}: {}'.format(fn,str(e))
|
|
494
498
|
print(s)
|
|
495
499
|
im['error'] = s
|
|
496
500
|
im['status'] = 'read failure'
|
|
497
501
|
im['exif_tags'] = None
|
|
498
|
-
|
|
502
|
+
|
|
499
503
|
return im
|
|
500
504
|
|
|
501
505
|
# ..._populate_exif_data()
|
|
@@ -503,67 +507,72 @@ def _populate_exif_data(im, image_base, options=None):
|
|
|
503
507
|
|
|
504
508
|
def _create_image_objects(image_files,recursive=True):
|
|
505
509
|
"""
|
|
506
|
-
Create empty image objects for every image in [image_files], which can be a
|
|
507
|
-
list of relative paths (which will get stored without processing, so the base
|
|
510
|
+
Create empty image objects for every image in [image_files], which can be a
|
|
511
|
+
list of relative paths (which will get stored without processing, so the base
|
|
508
512
|
path doesn't matter here), or a folder name.
|
|
509
|
-
|
|
513
|
+
|
|
510
514
|
Returns a list of dicts with field 'file_name' (a relative path).
|
|
511
|
-
|
|
515
|
+
|
|
512
516
|
"recursive" is ignored if "image_files" is a list.
|
|
513
517
|
"""
|
|
514
|
-
|
|
518
|
+
|
|
515
519
|
# Enumerate *relative* paths
|
|
516
|
-
if isinstance(image_files,str):
|
|
520
|
+
if isinstance(image_files,str):
|
|
517
521
|
print('Enumerating image files in {}'.format(image_files))
|
|
518
522
|
assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
|
|
519
523
|
image_files = find_images(image_files,
|
|
520
524
|
recursive=recursive,
|
|
521
525
|
return_relative_paths=True,
|
|
522
526
|
convert_slashes=True)
|
|
523
|
-
|
|
527
|
+
|
|
524
528
|
images = []
|
|
525
529
|
for fn in image_files:
|
|
526
530
|
im = {}
|
|
527
531
|
im['file_name'] = fn
|
|
528
532
|
images.append(im)
|
|
529
|
-
|
|
533
|
+
|
|
530
534
|
if debug_max_images is not None:
|
|
531
535
|
print('Trimming input list to {} images'.format(debug_max_images))
|
|
532
536
|
images = images[0:debug_max_images]
|
|
533
|
-
|
|
537
|
+
|
|
534
538
|
return images
|
|
535
539
|
|
|
536
540
|
|
|
537
541
|
def _populate_exif_for_images(image_base,images,options=None):
|
|
538
542
|
"""
|
|
539
|
-
Main worker loop: read EXIF data for each image object in [images] and
|
|
543
|
+
Main worker loop: read EXIF data for each image object in [images] and
|
|
540
544
|
populate the image objects in place.
|
|
541
|
-
|
|
545
|
+
|
|
542
546
|
'images' should be a list of dicts with the field 'file_name' containing
|
|
543
|
-
a relative path (relative to 'image_base').
|
|
547
|
+
a relative path (relative to 'image_base').
|
|
544
548
|
"""
|
|
545
|
-
|
|
549
|
+
|
|
546
550
|
if options is None:
|
|
547
551
|
options = ReadExifOptions()
|
|
548
552
|
|
|
549
553
|
if options.n_workers == 1:
|
|
550
|
-
|
|
554
|
+
|
|
551
555
|
results = []
|
|
552
556
|
for im in tqdm(images):
|
|
553
557
|
results.append(_populate_exif_data(im,image_base,options))
|
|
554
|
-
|
|
558
|
+
|
|
555
559
|
else:
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
560
|
+
|
|
561
|
+
pool = None
|
|
562
|
+
try:
|
|
563
|
+
if options.use_threads:
|
|
564
|
+
print('Starting parallel thread pool with {} workers'.format(options.n_workers))
|
|
565
|
+
pool = ThreadPool(options.n_workers)
|
|
566
|
+
else:
|
|
567
|
+
print('Starting parallel process pool with {} workers'.format(options.n_workers))
|
|
568
|
+
pool = Pool(options.n_workers)
|
|
569
|
+
|
|
570
|
+
results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
|
|
571
|
+
options=options),images),total=len(images)))
|
|
572
|
+
finally:
|
|
573
|
+
pool.close()
|
|
574
|
+
pool.join()
|
|
575
|
+
print("Pool closed and joined for EXIF extraction")
|
|
567
576
|
|
|
568
577
|
return results
|
|
569
578
|
|
|
@@ -571,23 +580,23 @@ def _populate_exif_for_images(image_base,images,options=None):
|
|
|
571
580
|
def _write_exif_results(results,output_file):
|
|
572
581
|
"""
|
|
573
582
|
Write EXIF information to [output_file].
|
|
574
|
-
|
|
583
|
+
|
|
575
584
|
'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
|
|
576
585
|
|
|
577
|
-
Writes to .csv or .json depending on the extension of 'output_file'.
|
|
586
|
+
Writes to .csv or .json depending on the extension of 'output_file'.
|
|
578
587
|
"""
|
|
579
|
-
|
|
588
|
+
|
|
580
589
|
if output_file.endswith('.json'):
|
|
581
|
-
|
|
590
|
+
|
|
582
591
|
with open(output_file,'w') as f:
|
|
583
592
|
json.dump(results,f,indent=1,default=str)
|
|
584
|
-
|
|
593
|
+
|
|
585
594
|
elif output_file.endswith('.csv'):
|
|
586
|
-
|
|
595
|
+
|
|
587
596
|
# Find all EXIF tags that exist in any image
|
|
588
597
|
all_keys = set()
|
|
589
598
|
for im in results:
|
|
590
|
-
|
|
599
|
+
|
|
591
600
|
keys_this_image = set()
|
|
592
601
|
exif_tags = im['exif_tags']
|
|
593
602
|
file_name = im['file_name']
|
|
@@ -597,51 +606,51 @@ def _write_exif_results(results,output_file):
|
|
|
597
606
|
'Error: tag {} appears twice in image {}'.format(
|
|
598
607
|
tag_name,file_name)
|
|
599
608
|
all_keys.add(tag_name)
|
|
600
|
-
|
|
609
|
+
|
|
601
610
|
# ...for each tag in this image
|
|
602
|
-
|
|
611
|
+
|
|
603
612
|
# ...for each image
|
|
604
|
-
|
|
613
|
+
|
|
605
614
|
all_keys = sorted(list(all_keys))
|
|
606
|
-
|
|
615
|
+
|
|
607
616
|
header = ['File Name']
|
|
608
617
|
header.extend(all_keys)
|
|
609
|
-
|
|
618
|
+
|
|
610
619
|
import csv
|
|
611
620
|
with open(output_file,'w') as csvfile:
|
|
612
|
-
|
|
621
|
+
|
|
613
622
|
writer = csv.writer(csvfile)
|
|
614
|
-
|
|
623
|
+
|
|
615
624
|
# Write header
|
|
616
625
|
writer.writerow(header)
|
|
617
|
-
|
|
626
|
+
|
|
618
627
|
for im in results:
|
|
619
|
-
|
|
628
|
+
|
|
620
629
|
row = [im['file_name']]
|
|
621
630
|
kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
|
|
622
|
-
|
|
631
|
+
|
|
623
632
|
for i_key,key in enumerate(all_keys):
|
|
624
633
|
value = ''
|
|
625
634
|
if key in kvp_this_image:
|
|
626
635
|
value = kvp_this_image[key]
|
|
627
|
-
row.append(value)
|
|
636
|
+
row.append(value)
|
|
628
637
|
# ...for each key that *might* be present in this image
|
|
629
|
-
|
|
638
|
+
|
|
630
639
|
assert len(row) == len(header)
|
|
631
|
-
|
|
640
|
+
|
|
632
641
|
writer.writerow(row)
|
|
633
|
-
|
|
642
|
+
|
|
634
643
|
# ...for each image
|
|
635
|
-
|
|
644
|
+
|
|
636
645
|
# ...with open()
|
|
637
|
-
|
|
646
|
+
|
|
638
647
|
else:
|
|
639
|
-
|
|
648
|
+
|
|
640
649
|
raise ValueError('Could not determine output type from file {}'.format(
|
|
641
650
|
output_file))
|
|
642
|
-
|
|
651
|
+
|
|
643
652
|
# ...if we're writing to .json/.csv
|
|
644
|
-
|
|
653
|
+
|
|
645
654
|
print('Wrote results to {}'.format(output_file))
|
|
646
655
|
|
|
647
656
|
# ..._write_exif_results(...)
|
|
@@ -650,7 +659,7 @@ def _write_exif_results(results,output_file):
|
|
|
650
659
|
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
|
|
651
660
|
"""
|
|
652
661
|
Read EXIF data for a folder of images.
|
|
653
|
-
|
|
662
|
+
|
|
654
663
|
Args:
|
|
655
664
|
input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
|
|
656
665
|
paths
|
|
@@ -662,22 +671,22 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
662
671
|
recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
|
|
663
672
|
is None.
|
|
664
673
|
verbose (bool, optional): enable additional debug output
|
|
665
|
-
|
|
674
|
+
|
|
666
675
|
Returns:
|
|
667
676
|
list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
|
|
668
677
|
* 'file_name': the relative path to the image
|
|
669
678
|
* 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
|
|
670
679
|
"""
|
|
671
|
-
|
|
680
|
+
|
|
672
681
|
if options is None:
|
|
673
682
|
options = ReadExifOptions()
|
|
674
|
-
|
|
683
|
+
|
|
675
684
|
# Validate options
|
|
676
685
|
if options.tags_to_include is not None:
|
|
677
686
|
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
678
687
|
if options.tags_to_exclude is not None:
|
|
679
|
-
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
680
|
-
|
|
688
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
689
|
+
|
|
681
690
|
if input_folder is None:
|
|
682
691
|
input_folder = ''
|
|
683
692
|
if len(input_folder) > 0:
|
|
@@ -686,12 +695,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
686
695
|
|
|
687
696
|
assert (len(input_folder) > 0) or (filenames is not None), \
|
|
688
697
|
'Must specify either a folder or a list of files'
|
|
689
|
-
|
|
690
|
-
if output_file is not None:
|
|
691
|
-
|
|
698
|
+
|
|
699
|
+
if output_file is not None:
|
|
700
|
+
|
|
692
701
|
assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
|
|
693
702
|
'I only know how to write results to .json or .csv'
|
|
694
|
-
|
|
703
|
+
|
|
695
704
|
try:
|
|
696
705
|
with open(output_file, 'a') as f:
|
|
697
706
|
if not f.writable():
|
|
@@ -699,7 +708,7 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
699
708
|
except Exception:
|
|
700
709
|
print('Could not write to file {}'.format(output_file))
|
|
701
710
|
raise
|
|
702
|
-
|
|
711
|
+
|
|
703
712
|
if options.processing_library == 'exif':
|
|
704
713
|
assert is_executable(options.exiftool_command_name), 'exiftool not available'
|
|
705
714
|
|
|
@@ -708,9 +717,9 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
708
717
|
else:
|
|
709
718
|
assert isinstance(filenames,list)
|
|
710
719
|
images = _create_image_objects(filenames)
|
|
711
|
-
|
|
720
|
+
|
|
712
721
|
results = _populate_exif_for_images(input_folder,images,options)
|
|
713
|
-
|
|
722
|
+
|
|
714
723
|
if output_file is not None:
|
|
715
724
|
try:
|
|
716
725
|
_write_exif_results(results,output_file)
|
|
@@ -718,8 +727,8 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
718
727
|
if not options.allow_write_error:
|
|
719
728
|
raise
|
|
720
729
|
else:
|
|
721
|
-
print('Warning: error serializing EXIF data: {}'.format(str(e)))
|
|
722
|
-
|
|
730
|
+
print('Warning: error serializing EXIF data: {}'.format(str(e)))
|
|
731
|
+
|
|
723
732
|
return results
|
|
724
733
|
|
|
725
734
|
# ...read_exif_from_folder(...)
|
|
@@ -728,54 +737,54 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
728
737
|
def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
729
738
|
"""
|
|
730
739
|
Given the EXIF results for a folder of images read via read_exif_from_folder,
|
|
731
|
-
create a COCO Camera Traps .json file that has no annotations, but
|
|
740
|
+
create a COCO Camera Traps .json file that has no annotations, but
|
|
732
741
|
attaches image filenames to locations and datetimes.
|
|
733
|
-
|
|
742
|
+
|
|
734
743
|
Args:
|
|
735
744
|
exif_results (str or list): the filename (or loaded list) containing the results
|
|
736
745
|
from read_exif_from_folder
|
|
737
|
-
cct_output_file (str,optional): the filename to which we should write
|
|
746
|
+
cct_output_file (str,optional): the filename to which we should write
|
|
738
747
|
COCO-Camera-Traps-formatted data
|
|
739
748
|
options (ExifResultsToCCTOptions, optional): options guiding the generation
|
|
740
749
|
of the CCT file, particularly location mapping
|
|
741
|
-
|
|
750
|
+
|
|
742
751
|
Returns:
|
|
743
752
|
dict: a COCO Camera Traps dict (with no annotations).
|
|
744
753
|
"""
|
|
745
|
-
|
|
754
|
+
|
|
746
755
|
if options is None:
|
|
747
756
|
options = ExifResultsToCCTOptions()
|
|
748
|
-
|
|
757
|
+
|
|
749
758
|
if isinstance(exif_results,str):
|
|
750
759
|
print('Reading EXIF results from {}'.format(exif_results))
|
|
751
760
|
with open(exif_results,'r') as f:
|
|
752
761
|
exif_results = json.load(f)
|
|
753
762
|
else:
|
|
754
763
|
assert isinstance(exif_results,list)
|
|
755
|
-
|
|
764
|
+
|
|
756
765
|
now = datetime.now()
|
|
757
766
|
|
|
758
767
|
image_info = []
|
|
759
768
|
|
|
760
769
|
images_without_datetime = []
|
|
761
770
|
images_with_invalid_datetime = []
|
|
762
|
-
|
|
771
|
+
|
|
763
772
|
# exif_result = exif_results[0]
|
|
764
773
|
for exif_result in tqdm(exif_results):
|
|
765
|
-
|
|
774
|
+
|
|
766
775
|
im = {}
|
|
767
|
-
|
|
776
|
+
|
|
768
777
|
# By default we assume that each leaf-node folder is a location
|
|
769
778
|
if options.filename_to_location_function is None:
|
|
770
779
|
im['location'] = 'unknown'
|
|
771
780
|
else:
|
|
772
|
-
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
773
|
-
|
|
781
|
+
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
782
|
+
|
|
774
783
|
im['file_name'] = exif_result['file_name']
|
|
775
784
|
im['id'] = im['file_name']
|
|
776
|
-
|
|
785
|
+
|
|
777
786
|
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
778
|
-
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
787
|
+
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
779
788
|
exif_dt = None
|
|
780
789
|
else:
|
|
781
790
|
exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
|
|
@@ -785,26 +794,26 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
785
794
|
images_without_datetime.append(im['file_name'])
|
|
786
795
|
else:
|
|
787
796
|
dt = exif_dt
|
|
788
|
-
|
|
797
|
+
|
|
789
798
|
# An image from the future (or within the last 24 hours) is invalid
|
|
790
799
|
if (now - dt).total_seconds() <= 1*24*60*60:
|
|
791
800
|
print('Warning: datetime for {} is {}'.format(
|
|
792
801
|
im['file_name'],dt))
|
|
793
|
-
im['datetime'] = None
|
|
802
|
+
im['datetime'] = None
|
|
794
803
|
images_with_invalid_datetime.append(im['file_name'])
|
|
795
|
-
|
|
804
|
+
|
|
796
805
|
# An image from before the dawn of time is also invalid
|
|
797
806
|
elif dt.year < options.min_valid_timestamp_year:
|
|
798
807
|
print('Warning: datetime for {} is {}'.format(
|
|
799
808
|
im['file_name'],dt))
|
|
800
809
|
im['datetime'] = None
|
|
801
810
|
images_with_invalid_datetime.append(im['file_name'])
|
|
802
|
-
|
|
811
|
+
|
|
803
812
|
else:
|
|
804
813
|
im['datetime'] = dt
|
|
805
814
|
|
|
806
815
|
image_info.append(im)
|
|
807
|
-
|
|
816
|
+
|
|
808
817
|
# ...for each exif image result
|
|
809
818
|
|
|
810
819
|
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
@@ -815,21 +824,21 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
815
824
|
d['images'] = image_info
|
|
816
825
|
d['annotations'] = []
|
|
817
826
|
d['categories'] = []
|
|
818
|
-
|
|
827
|
+
|
|
819
828
|
if cct_output_file is not None:
|
|
820
829
|
write_object_with_serialized_datetimes(d,cct_output_file)
|
|
821
|
-
|
|
830
|
+
|
|
822
831
|
return d
|
|
823
832
|
|
|
824
833
|
# ...exif_results_to_cct(...)
|
|
825
834
|
|
|
826
|
-
|
|
835
|
+
|
|
827
836
|
#%% Interactive driver
|
|
828
837
|
|
|
829
838
|
if False:
|
|
830
|
-
|
|
839
|
+
|
|
831
840
|
#%%
|
|
832
|
-
|
|
841
|
+
|
|
833
842
|
input_folder = r'C:\temp\md-name-testing'
|
|
834
843
|
output_file = None # r'C:\temp\md-name-testing\exif.json'
|
|
835
844
|
options = ReadExifOptions()
|
|
@@ -838,30 +847,28 @@ if False:
|
|
|
838
847
|
options.use_threads = False
|
|
839
848
|
options.processing_library = 'pil'
|
|
840
849
|
# options.processing_library = 'exiftool'
|
|
841
|
-
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
|
|
850
|
+
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
|
|
851
|
+
'ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
|
|
842
852
|
# options.tags_to_exclude = ['MakerNote']
|
|
843
|
-
|
|
853
|
+
|
|
844
854
|
results = read_exif_from_folder(input_folder,output_file,options)
|
|
845
855
|
|
|
846
856
|
#%%
|
|
847
|
-
|
|
857
|
+
|
|
848
858
|
with open(output_file,'r') as f:
|
|
849
859
|
d = json.load(f)
|
|
850
|
-
|
|
851
860
|
|
|
852
|
-
#%% Command-line driver
|
|
853
861
|
|
|
854
|
-
|
|
855
|
-
import sys
|
|
862
|
+
#%% Command-line driver
|
|
856
863
|
|
|
857
|
-
def main():
|
|
864
|
+
def main(): # noqa
|
|
858
865
|
|
|
859
866
|
options = ReadExifOptions()
|
|
860
|
-
|
|
867
|
+
|
|
861
868
|
parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
|
|
862
869
|
' a folder, and write the results to .csv or .json'))
|
|
863
870
|
|
|
864
|
-
parser.add_argument('input_folder', type=str,
|
|
871
|
+
parser.add_argument('input_folder', type=str,
|
|
865
872
|
help='Folder of images from which we should read EXIF information')
|
|
866
873
|
parser.add_argument('output_file', type=str,
|
|
867
874
|
help='Output file (.json) to which we should write EXIF information')
|
|
@@ -871,16 +878,16 @@ def main():
|
|
|
871
878
|
help='Use threads (instead of processes) for multitasking')
|
|
872
879
|
parser.add_argument('--processing_library', type=str, default=options.processing_library,
|
|
873
880
|
help='Processing library (exif or pil)')
|
|
874
|
-
|
|
881
|
+
|
|
875
882
|
if len(sys.argv[1:]) == 0:
|
|
876
883
|
parser.print_help()
|
|
877
884
|
parser.exit()
|
|
878
885
|
|
|
879
|
-
args = parser.parse_args()
|
|
886
|
+
args = parser.parse_args()
|
|
880
887
|
args_to_object(args, options)
|
|
881
888
|
options.processing_library = options.processing_library.lower()
|
|
882
|
-
|
|
889
|
+
|
|
883
890
|
read_exif_from_folder(args.input_folder,args.output_file,options)
|
|
884
|
-
|
|
891
|
+
|
|
885
892
|
if __name__ == '__main__':
|
|
886
893
|
main()
|