megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
read_exif.py
|
|
4
4
|
|
|
5
|
-
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
-
and writes them to a .json or .csv file.
|
|
5
|
+
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
+
and writes them to a .json or .csv file.
|
|
7
7
|
|
|
8
8
|
This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
|
|
9
9
|
can read everything). The latter approach expects that exiftool is available on the system
|
|
@@ -16,6 +16,9 @@ path. No attempt is made to be consistent in format across the two approaches.
|
|
|
16
16
|
import os
|
|
17
17
|
import subprocess
|
|
18
18
|
import json
|
|
19
|
+
import argparse
|
|
20
|
+
import sys
|
|
21
|
+
|
|
19
22
|
from datetime import datetime
|
|
20
23
|
|
|
21
24
|
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
@@ -23,6 +26,7 @@ from multiprocessing.pool import Pool as Pool
|
|
|
23
26
|
|
|
24
27
|
from tqdm import tqdm
|
|
25
28
|
from PIL import Image, ExifTags
|
|
29
|
+
from functools import partial
|
|
26
30
|
|
|
27
31
|
from megadetector.utils.path_utils import find_images, is_executable
|
|
28
32
|
from megadetector.utils.ct_utils import args_to_object
|
|
@@ -31,7 +35,9 @@ from megadetector.data_management.cct_json_utils import write_object_with_serial
|
|
|
31
35
|
|
|
32
36
|
debug_max_images = None
|
|
33
37
|
|
|
34
|
-
minimal_exif_tags =
|
|
38
|
+
minimal_exif_tags = \
|
|
39
|
+
['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
|
|
40
|
+
'DateTimeOriginal','Orientation', 'GPSInfo']
|
|
35
41
|
|
|
36
42
|
|
|
37
43
|
#%% Options
|
|
@@ -40,85 +46,84 @@ class ReadExifOptions:
|
|
|
40
46
|
"""
|
|
41
47
|
Parameters controlling metadata extraction.
|
|
42
48
|
"""
|
|
43
|
-
|
|
49
|
+
|
|
44
50
|
def __init__(self):
|
|
45
|
-
|
|
51
|
+
|
|
46
52
|
#: Enable additional debug console output
|
|
47
53
|
self.verbose = False
|
|
48
|
-
|
|
54
|
+
|
|
49
55
|
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
50
56
|
#: and we encounter a serialization issue, we'll return the results but won't
|
|
51
|
-
#: error.
|
|
57
|
+
#: error.
|
|
52
58
|
self.allow_write_error = False
|
|
53
|
-
|
|
59
|
+
|
|
54
60
|
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
55
61
|
self.n_workers = 1
|
|
56
|
-
|
|
62
|
+
|
|
57
63
|
#: Should we use threads (vs. processes) for parallelization?
|
|
58
64
|
#:
|
|
59
65
|
#: Not relevant if n_workers is <= 1.
|
|
60
66
|
self.use_threads = True
|
|
61
|
-
|
|
62
|
-
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
67
|
+
|
|
68
|
+
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
63
69
|
#: doesn't come from EXIF, rather from the file (e.g. file size).
|
|
64
70
|
self.tag_types_to_ignore = set(['File','ExifTool'])
|
|
65
|
-
|
|
71
|
+
|
|
66
72
|
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
67
73
|
#:
|
|
68
74
|
#: A useful set of tags one might want to limit queries for:
|
|
69
75
|
#:
|
|
70
|
-
#: options.tags_to_include =
|
|
71
|
-
#: 'DateTimeOriginal','Orientation']
|
|
76
|
+
#: options.tags_to_include = minimal_exif_tags
|
|
72
77
|
self.tags_to_include = None
|
|
73
|
-
|
|
78
|
+
|
|
74
79
|
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
75
80
|
self.tags_to_exclude = None
|
|
76
|
-
|
|
81
|
+
|
|
77
82
|
#: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
|
|
78
83
|
#: can be just "exiftool", in which case it should be on your system path.
|
|
79
84
|
self.exiftool_command_name = 'exiftool'
|
|
80
|
-
|
|
85
|
+
|
|
81
86
|
#: How should we handle byte-formatted EXIF tags?
|
|
82
87
|
#:
|
|
83
88
|
#: 'convert_to_string': convert to a Python string
|
|
84
89
|
#: 'delete': don't include at all
|
|
85
90
|
#: 'raw': include as a byte string
|
|
86
91
|
self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
87
|
-
|
|
92
|
+
|
|
88
93
|
#: Should we use exiftool or PIL?
|
|
89
94
|
self.processing_library = 'pil' # 'exiftool','pil'
|
|
90
|
-
|
|
95
|
+
|
|
91
96
|
|
|
92
97
|
class ExifResultsToCCTOptions:
|
|
93
98
|
"""
|
|
94
|
-
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
99
|
+
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
95
100
|
information) extracted by read_exif_from_folder().
|
|
96
101
|
"""
|
|
97
|
-
|
|
102
|
+
|
|
98
103
|
def __init__(self):
|
|
99
|
-
|
|
104
|
+
|
|
100
105
|
#: Timestamps older than this are assumed to be junk; lots of cameras use a
|
|
101
106
|
#: default time in 2000.
|
|
102
107
|
self.min_valid_timestamp_year = 2001
|
|
103
|
-
|
|
108
|
+
|
|
104
109
|
#: The EXIF tag from which to pull datetime information
|
|
105
110
|
self.exif_datetime_tag = 'DateTimeOriginal'
|
|
106
|
-
|
|
111
|
+
|
|
107
112
|
#: Function for extracting location information, should take a string
|
|
108
113
|
#: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
|
|
109
114
|
#: this is None, location is written as "unknown".
|
|
110
115
|
self.filename_to_location_function = image_file_to_camera_folder
|
|
111
|
-
|
|
116
|
+
|
|
112
117
|
|
|
113
118
|
#%% Functions
|
|
114
119
|
|
|
115
120
|
def _get_exif_ifd(exif):
|
|
116
121
|
"""
|
|
117
122
|
Read EXIF data from by finding the EXIF offset and reading tags directly
|
|
118
|
-
|
|
123
|
+
|
|
119
124
|
https://github.com/python-pillow/Pillow/issues/5863
|
|
120
125
|
"""
|
|
121
|
-
|
|
126
|
+
|
|
122
127
|
# Find the offset for all the EXIF information
|
|
123
128
|
for key, value in ExifTags.TAGS.items():
|
|
124
129
|
if value == "ExifOffset":
|
|
@@ -132,19 +137,19 @@ def _get_exif_ifd(exif):
|
|
|
132
137
|
|
|
133
138
|
def has_gps_info(im):
|
|
134
139
|
"""
|
|
135
|
-
Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
|
|
140
|
+
Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
|
|
136
141
|
determine whether GPS location information is present in this image. Does not retrieve
|
|
137
142
|
location info, currently only used to determine whether it's present.
|
|
138
|
-
|
|
143
|
+
|
|
139
144
|
Args:
|
|
140
145
|
im (str, PIL.Image.Image, dict): image for which we should determine GPS metadata
|
|
141
146
|
presence
|
|
142
|
-
|
|
147
|
+
|
|
143
148
|
Returns:
|
|
144
149
|
bool: whether GPS metadata is present, or None if we failed to read EXIF data from
|
|
145
150
|
a file.
|
|
146
151
|
"""
|
|
147
|
-
|
|
152
|
+
|
|
148
153
|
if isinstance(im,str) or isinstance(im,Image.Image):
|
|
149
154
|
exif_tags = read_pil_exif(im)
|
|
150
155
|
if exif_tags is None:
|
|
@@ -153,57 +158,58 @@ def has_gps_info(im):
|
|
|
153
158
|
else:
|
|
154
159
|
assert isinstance(im,dict)
|
|
155
160
|
exif_tags = im
|
|
156
|
-
|
|
161
|
+
|
|
157
162
|
if 'exif_tags' in exif_tags:
|
|
158
163
|
exif_tags = exif_tags['exif_tags']
|
|
159
164
|
if exif_tags is None:
|
|
160
165
|
return None
|
|
161
|
-
|
|
166
|
+
|
|
162
167
|
if 'GPSInfo' in exif_tags and \
|
|
163
168
|
exif_tags['GPSInfo'] is not None and \
|
|
164
169
|
isinstance(exif_tags['GPSInfo'],dict):
|
|
165
|
-
|
|
170
|
+
|
|
166
171
|
# Don't indicate that GPS data is present if only GPS version info is present
|
|
167
172
|
if ('GPSLongitude' in exif_tags['GPSInfo']) or ('GPSLatitude' in exif_tags['GPSInfo']):
|
|
168
173
|
return True
|
|
169
174
|
return False
|
|
170
|
-
|
|
175
|
+
|
|
171
176
|
return False
|
|
172
|
-
|
|
173
|
-
# ...def has_gps_info(...)
|
|
177
|
+
|
|
178
|
+
# ...def has_gps_info(...)
|
|
174
179
|
|
|
175
180
|
|
|
176
181
|
def read_pil_exif(im,options=None):
|
|
177
182
|
"""
|
|
178
183
|
Read all the EXIF data we know how to read from an image, using PIL. This is primarily
|
|
179
|
-
an internal function; the main entry point for single-image EXIF information is
|
|
184
|
+
an internal function; the main entry point for single-image EXIF information is
|
|
180
185
|
read_exif_tags_for_image().
|
|
181
|
-
|
|
186
|
+
|
|
182
187
|
Args:
|
|
183
|
-
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
188
|
+
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
184
189
|
we should read EXIF data.
|
|
185
|
-
|
|
190
|
+
options (ReadExifOptions, optional): see ReadExifOptions
|
|
191
|
+
|
|
186
192
|
Returns:
|
|
187
193
|
dict: a dictionary mapping EXIF tag names to their values
|
|
188
194
|
"""
|
|
189
|
-
|
|
195
|
+
|
|
190
196
|
if options is None:
|
|
191
197
|
options = ReadExifOptions()
|
|
192
|
-
|
|
198
|
+
|
|
193
199
|
image_name = '[image]'
|
|
194
200
|
if isinstance(im,str):
|
|
195
201
|
image_name = im
|
|
196
202
|
im = Image.open(im)
|
|
197
|
-
|
|
203
|
+
|
|
198
204
|
exif_tags = {}
|
|
199
205
|
try:
|
|
200
206
|
exif_info = im.getexif()
|
|
201
207
|
except Exception:
|
|
202
208
|
exif_info = None
|
|
203
|
-
|
|
209
|
+
|
|
204
210
|
if exif_info is None:
|
|
205
211
|
return exif_tags
|
|
206
|
-
|
|
212
|
+
|
|
207
213
|
for k, v in exif_info.items():
|
|
208
214
|
assert isinstance(k,str) or isinstance(k,int), \
|
|
209
215
|
'Invalid EXIF key {}'.format(str(k))
|
|
@@ -212,9 +218,9 @@ def read_pil_exif(im,options=None):
|
|
|
212
218
|
else:
|
|
213
219
|
# print('Warning: unrecognized EXIF tag: {}'.format(k))
|
|
214
220
|
exif_tags[k] = str(v)
|
|
215
|
-
|
|
221
|
+
|
|
216
222
|
exif_ifd_tags = _get_exif_ifd(exif_info)
|
|
217
|
-
|
|
223
|
+
|
|
218
224
|
for k in exif_ifd_tags.keys():
|
|
219
225
|
v = exif_ifd_tags[k]
|
|
220
226
|
if k in exif_tags:
|
|
@@ -223,16 +229,16 @@ def read_pil_exif(im,options=None):
|
|
|
223
229
|
k,image_name,exif_tags[k],v))
|
|
224
230
|
else:
|
|
225
231
|
exif_tags[k] = v
|
|
226
|
-
|
|
232
|
+
|
|
227
233
|
exif_tag_names = list(exif_tags.keys())
|
|
228
|
-
|
|
234
|
+
|
|
229
235
|
# Type conversion and cleanup
|
|
230
|
-
#
|
|
236
|
+
#
|
|
231
237
|
# Most quirky types will get serialized to string when we write to .json.
|
|
232
238
|
for k in exif_tag_names:
|
|
233
|
-
|
|
239
|
+
|
|
234
240
|
if isinstance(exif_tags[k],bytes):
|
|
235
|
-
|
|
241
|
+
|
|
236
242
|
if options.byte_handling == 'delete':
|
|
237
243
|
del exif_tags[k]
|
|
238
244
|
elif options.byte_handling == 'raw':
|
|
@@ -240,24 +246,24 @@ def read_pil_exif(im,options=None):
|
|
|
240
246
|
else:
|
|
241
247
|
assert options.byte_handling == 'convert_to_string'
|
|
242
248
|
exif_tags[k] = str(exif_tags[k])
|
|
243
|
-
|
|
249
|
+
|
|
244
250
|
elif isinstance(exif_tags[k],str):
|
|
245
|
-
|
|
251
|
+
|
|
246
252
|
exif_tags[k] = exif_tags[k].strip()
|
|
247
|
-
|
|
248
|
-
# Special case for GPS info... I could decode other encoded tags, but GPS info is
|
|
253
|
+
|
|
254
|
+
# Special case for GPS info... I could decode other encoded tags, but GPS info is
|
|
249
255
|
# particularly important, so I'm only doing that for now.
|
|
250
256
|
if 'GPSInfo' in exif_tags:
|
|
251
|
-
|
|
257
|
+
|
|
252
258
|
try:
|
|
253
|
-
|
|
259
|
+
|
|
254
260
|
# Find the tag number for GPS info, in practice should alays be 34853
|
|
255
|
-
|
|
256
|
-
assert
|
|
257
|
-
|
|
261
|
+
gpsinfo_tag = next(tag for tag, name in ExifTags.TAGS.items() if name == "GPSInfo")
|
|
262
|
+
assert gpsinfo_tag == 34853
|
|
263
|
+
|
|
258
264
|
# These are integer keys, e.g. {7: (14.0, 27.0, 7.24)}
|
|
259
|
-
gps_info_raw = exif_info.get_ifd(
|
|
260
|
-
|
|
265
|
+
gps_info_raw = exif_info.get_ifd(gpsinfo_tag)
|
|
266
|
+
|
|
261
267
|
# Convert to strings, e.g. 'GPSTimeStamp'
|
|
262
268
|
gps_info = {}
|
|
263
269
|
for int_tag,v in enumerate(gps_info_raw.keys()):
|
|
@@ -266,15 +272,15 @@ def read_pil_exif(im,options=None):
|
|
|
266
272
|
gps_info[ExifTags.GPSTAGS[int_tag]] = v
|
|
267
273
|
else:
|
|
268
274
|
gps_info[int_tag] = v
|
|
269
|
-
|
|
275
|
+
|
|
270
276
|
exif_tags['GPSInfo'] = gps_info
|
|
271
|
-
|
|
277
|
+
|
|
272
278
|
except Exception as e:
|
|
273
279
|
if options.verbose:
|
|
274
280
|
print('Warning: error reading GPS info: {}'.format(str(e)))
|
|
275
|
-
|
|
281
|
+
|
|
276
282
|
# ...if we think there might be GPS tags in this image
|
|
277
|
-
|
|
283
|
+
|
|
278
284
|
return exif_tags
|
|
279
285
|
|
|
280
286
|
# ...read_pil_exif()
|
|
@@ -284,27 +290,33 @@ def format_datetime_as_exif_datetime_string(dt):
|
|
|
284
290
|
"""
|
|
285
291
|
Returns a Python datetime object rendered using the standard EXIF datetime
|
|
286
292
|
string format ('%Y:%m:%d %H:%M:%S')
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
dt (datetime): datetime object to format
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
str: [dt] as a string in standard EXIF format
|
|
287
299
|
"""
|
|
288
|
-
|
|
300
|
+
|
|
289
301
|
return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
|
|
290
|
-
|
|
302
|
+
|
|
291
303
|
|
|
292
304
|
def parse_exif_datetime_string(s,verbose=False):
|
|
293
305
|
""""
|
|
294
|
-
Exif datetimes are strings, but in a standard format:
|
|
295
|
-
|
|
306
|
+
Exif datetimes are strings, but in a standard format:
|
|
307
|
+
|
|
296
308
|
%Y:%m:%d %H:%M:%S
|
|
297
|
-
|
|
309
|
+
|
|
298
310
|
Parses one of those strings into a Python datetime object.
|
|
299
|
-
|
|
311
|
+
|
|
300
312
|
Args:
|
|
301
313
|
s (str): datetime string to parse, should be in standard EXIF datetime format
|
|
302
314
|
verbose (bool, optional): enable additional debug output
|
|
303
|
-
|
|
315
|
+
|
|
304
316
|
Returns:
|
|
305
317
|
datetime: the datetime object created from [s]
|
|
306
318
|
"""
|
|
307
|
-
|
|
319
|
+
|
|
308
320
|
dt = None
|
|
309
321
|
try:
|
|
310
322
|
dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
|
|
@@ -319,7 +331,7 @@ def _filter_tags(tags,options):
|
|
|
319
331
|
Internal function used to include/exclude specific tags from the exif_tags
|
|
320
332
|
dict.
|
|
321
333
|
"""
|
|
322
|
-
|
|
334
|
+
|
|
323
335
|
if options is None:
|
|
324
336
|
return tags
|
|
325
337
|
if options.tags_to_include is None and options.tags_to_exclude is None:
|
|
@@ -343,23 +355,27 @@ def _filter_tags(tags,options):
|
|
|
343
355
|
def read_exif_tags_for_image(file_path,options=None):
|
|
344
356
|
"""
|
|
345
357
|
Get relevant fields from EXIF data for an image
|
|
346
|
-
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
file_path (str): image from which we should read EXIF data
|
|
361
|
+
options (ReadExifOptions, optional): see ReadExifOptions
|
|
362
|
+
|
|
347
363
|
Returns:
|
|
348
|
-
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
364
|
+
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
349
365
|
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
350
366
|
options.processing_library:
|
|
351
|
-
|
|
367
|
+
|
|
352
368
|
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
|
|
353
369
|
- For PIL, 'tags' is a dict (str:str)
|
|
354
370
|
"""
|
|
355
|
-
|
|
371
|
+
|
|
356
372
|
if options is None:
|
|
357
373
|
options = ReadExifOptions()
|
|
358
|
-
|
|
374
|
+
|
|
359
375
|
result = {'status':'unknown','tags':[]}
|
|
360
|
-
|
|
376
|
+
|
|
361
377
|
if options.processing_library == 'pil':
|
|
362
|
-
|
|
378
|
+
|
|
363
379
|
try:
|
|
364
380
|
exif_tags = read_pil_exif(file_path,options)
|
|
365
381
|
|
|
@@ -369,18 +385,18 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
369
385
|
file_path,str(e)))
|
|
370
386
|
result['status'] = 'read_failure'
|
|
371
387
|
result['error'] = str(e)
|
|
372
|
-
|
|
388
|
+
|
|
373
389
|
if result['status'] == 'unknown':
|
|
374
|
-
if exif_tags is None:
|
|
390
|
+
if exif_tags is None:
|
|
375
391
|
result['status'] = 'empty_read'
|
|
376
392
|
else:
|
|
377
393
|
result['status'] = 'success'
|
|
378
394
|
result['tags'] = _filter_tags(exif_tags,options)
|
|
379
|
-
|
|
395
|
+
|
|
380
396
|
return result
|
|
381
|
-
|
|
397
|
+
|
|
382
398
|
elif options.processing_library == 'exiftool':
|
|
383
|
-
|
|
399
|
+
|
|
384
400
|
# -G means "Print group name for each tag", e.g. print:
|
|
385
401
|
#
|
|
386
402
|
# [File] Bits Per Sample : 8
|
|
@@ -390,95 +406,95 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
390
406
|
# Bits Per Sample : 8
|
|
391
407
|
proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
|
|
392
408
|
stdout=subprocess.PIPE, encoding='utf8')
|
|
393
|
-
|
|
394
|
-
exif_lines = proc.stdout.readlines()
|
|
409
|
+
|
|
410
|
+
exif_lines = proc.stdout.readlines()
|
|
395
411
|
exif_lines = [s.strip() for s in exif_lines]
|
|
396
412
|
if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
|
|
397
413
|
any([s.lower().startswith('[exif]') for s in exif_lines])):
|
|
398
414
|
result['status'] = 'failure'
|
|
399
415
|
return result
|
|
400
|
-
|
|
416
|
+
|
|
401
417
|
# A list of three-element lists (type/tag/value)
|
|
402
418
|
exif_tags = []
|
|
403
|
-
|
|
419
|
+
|
|
404
420
|
# line_raw = exif_lines[0]
|
|
405
421
|
for line_raw in exif_lines:
|
|
406
|
-
|
|
422
|
+
|
|
407
423
|
# A typical line:
|
|
408
424
|
#
|
|
409
425
|
# [ExifTool] ExifTool Version Number : 12.13
|
|
410
|
-
|
|
426
|
+
|
|
411
427
|
line = line_raw.strip()
|
|
412
|
-
|
|
428
|
+
|
|
413
429
|
# Split on the first occurrence of ":"
|
|
414
430
|
tokens = line.split(':',1)
|
|
415
431
|
assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
|
|
416
432
|
len(tokens))
|
|
417
|
-
|
|
418
|
-
field_value = tokens[1].strip()
|
|
419
|
-
|
|
420
|
-
field_name_type = tokens[0].strip()
|
|
433
|
+
|
|
434
|
+
field_value = tokens[1].strip()
|
|
435
|
+
|
|
436
|
+
field_name_type = tokens[0].strip()
|
|
421
437
|
field_name_type_tokens = field_name_type.split(None,1)
|
|
422
438
|
assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
|
|
423
|
-
|
|
439
|
+
|
|
424
440
|
field_type = field_name_type_tokens[0].strip()
|
|
425
441
|
assert field_type.startswith('[') and field_type.endswith(']'), \
|
|
426
442
|
'Invalid EXIF field {}'.format(field_type)
|
|
427
443
|
field_type = field_type[1:-1]
|
|
428
|
-
|
|
444
|
+
|
|
429
445
|
if field_type in options.tag_types_to_ignore:
|
|
430
446
|
if options.verbose:
|
|
431
447
|
print('Ignoring tag with type {}'.format(field_type))
|
|
432
|
-
continue
|
|
433
|
-
|
|
448
|
+
continue
|
|
449
|
+
|
|
434
450
|
field_name = field_name_type_tokens[1].strip()
|
|
435
451
|
if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
|
|
436
452
|
continue
|
|
437
453
|
if options.tags_to_include is not None and field_name not in options.tags_to_include:
|
|
438
454
|
continue
|
|
439
455
|
tag = [field_type,field_name,field_value]
|
|
440
|
-
|
|
456
|
+
|
|
441
457
|
exif_tags.append(tag)
|
|
442
|
-
|
|
458
|
+
|
|
443
459
|
# ...for each output line
|
|
444
|
-
|
|
460
|
+
|
|
445
461
|
result['status'] = 'success'
|
|
446
462
|
result['tags'] = exif_tags
|
|
447
463
|
return result
|
|
448
|
-
|
|
464
|
+
|
|
449
465
|
else:
|
|
450
|
-
|
|
466
|
+
|
|
451
467
|
raise ValueError('Unknown processing library {}'.format(
|
|
452
468
|
options.processing_library))
|
|
453
469
|
|
|
454
470
|
# ...which processing library are we using?
|
|
455
|
-
|
|
471
|
+
|
|
456
472
|
# ...read_exif_tags_for_image()
|
|
457
473
|
|
|
458
474
|
|
|
459
475
|
def _populate_exif_data(im, image_base, options=None):
|
|
460
476
|
"""
|
|
461
477
|
Populate EXIF data into the 'exif_tags' field in the image object [im].
|
|
462
|
-
|
|
478
|
+
|
|
463
479
|
im['file_name'] should be prepopulated, relative to image_base.
|
|
464
|
-
|
|
480
|
+
|
|
465
481
|
Returns a modified version of [im], also modifies [im] in place.
|
|
466
482
|
"""
|
|
467
|
-
|
|
483
|
+
|
|
468
484
|
if options is None:
|
|
469
485
|
options = ReadExifOptions()
|
|
470
486
|
|
|
471
487
|
fn = im['file_name']
|
|
472
488
|
if options.verbose:
|
|
473
489
|
print('Processing {}'.format(fn))
|
|
474
|
-
|
|
490
|
+
|
|
475
491
|
try:
|
|
476
|
-
|
|
492
|
+
|
|
477
493
|
file_path = os.path.join(image_base,fn)
|
|
478
494
|
assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
|
|
479
495
|
result = read_exif_tags_for_image(file_path,options)
|
|
480
496
|
if result['status'] == 'success':
|
|
481
|
-
exif_tags = result['tags']
|
|
497
|
+
exif_tags = result['tags']
|
|
482
498
|
im['exif_tags'] = exif_tags
|
|
483
499
|
else:
|
|
484
500
|
im['exif_tags'] = None
|
|
@@ -487,15 +503,15 @@ def _populate_exif_data(im, image_base, options=None):
|
|
|
487
503
|
im['error'] = result['error']
|
|
488
504
|
if options.verbose:
|
|
489
505
|
print('Error reading EXIF data for {}'.format(file_path))
|
|
490
|
-
|
|
506
|
+
|
|
491
507
|
except Exception as e:
|
|
492
|
-
|
|
508
|
+
|
|
493
509
|
s = 'Error on {}: {}'.format(fn,str(e))
|
|
494
510
|
print(s)
|
|
495
511
|
im['error'] = s
|
|
496
512
|
im['status'] = 'read failure'
|
|
497
513
|
im['exif_tags'] = None
|
|
498
|
-
|
|
514
|
+
|
|
499
515
|
return im
|
|
500
516
|
|
|
501
517
|
# ..._populate_exif_data()
|
|
@@ -503,67 +519,72 @@ def _populate_exif_data(im, image_base, options=None):
|
|
|
503
519
|
|
|
504
520
|
def _create_image_objects(image_files,recursive=True):
|
|
505
521
|
"""
|
|
506
|
-
Create empty image objects for every image in [image_files], which can be a
|
|
507
|
-
list of relative paths (which will get stored without processing, so the base
|
|
522
|
+
Create empty image objects for every image in [image_files], which can be a
|
|
523
|
+
list of relative paths (which will get stored without processing, so the base
|
|
508
524
|
path doesn't matter here), or a folder name.
|
|
509
|
-
|
|
525
|
+
|
|
510
526
|
Returns a list of dicts with field 'file_name' (a relative path).
|
|
511
|
-
|
|
527
|
+
|
|
512
528
|
"recursive" is ignored if "image_files" is a list.
|
|
513
529
|
"""
|
|
514
|
-
|
|
530
|
+
|
|
515
531
|
# Enumerate *relative* paths
|
|
516
|
-
if isinstance(image_files,str):
|
|
532
|
+
if isinstance(image_files,str):
|
|
517
533
|
print('Enumerating image files in {}'.format(image_files))
|
|
518
534
|
assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
|
|
519
535
|
image_files = find_images(image_files,
|
|
520
536
|
recursive=recursive,
|
|
521
537
|
return_relative_paths=True,
|
|
522
538
|
convert_slashes=True)
|
|
523
|
-
|
|
539
|
+
|
|
524
540
|
images = []
|
|
525
541
|
for fn in image_files:
|
|
526
542
|
im = {}
|
|
527
543
|
im['file_name'] = fn
|
|
528
544
|
images.append(im)
|
|
529
|
-
|
|
545
|
+
|
|
530
546
|
if debug_max_images is not None:
|
|
531
547
|
print('Trimming input list to {} images'.format(debug_max_images))
|
|
532
548
|
images = images[0:debug_max_images]
|
|
533
|
-
|
|
549
|
+
|
|
534
550
|
return images
|
|
535
551
|
|
|
536
552
|
|
|
537
553
|
def _populate_exif_for_images(image_base,images,options=None):
|
|
538
554
|
"""
|
|
539
|
-
Main worker loop: read EXIF data for each image object in [images] and
|
|
555
|
+
Main worker loop: read EXIF data for each image object in [images] and
|
|
540
556
|
populate the image objects in place.
|
|
541
|
-
|
|
557
|
+
|
|
542
558
|
'images' should be a list of dicts with the field 'file_name' containing
|
|
543
|
-
a relative path (relative to 'image_base').
|
|
559
|
+
a relative path (relative to 'image_base').
|
|
544
560
|
"""
|
|
545
|
-
|
|
561
|
+
|
|
546
562
|
if options is None:
|
|
547
563
|
options = ReadExifOptions()
|
|
548
564
|
|
|
549
565
|
if options.n_workers == 1:
|
|
550
|
-
|
|
566
|
+
|
|
551
567
|
results = []
|
|
552
568
|
for im in tqdm(images):
|
|
553
569
|
results.append(_populate_exif_data(im,image_base,options))
|
|
554
|
-
|
|
570
|
+
|
|
555
571
|
else:
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
572
|
+
|
|
573
|
+
pool = None
|
|
574
|
+
try:
|
|
575
|
+
if options.use_threads:
|
|
576
|
+
print('Starting parallel thread pool with {} workers'.format(options.n_workers))
|
|
577
|
+
pool = ThreadPool(options.n_workers)
|
|
578
|
+
else:
|
|
579
|
+
print('Starting parallel process pool with {} workers'.format(options.n_workers))
|
|
580
|
+
pool = Pool(options.n_workers)
|
|
581
|
+
|
|
582
|
+
results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
|
|
583
|
+
options=options),images),total=len(images)))
|
|
584
|
+
finally:
|
|
585
|
+
pool.close()
|
|
586
|
+
pool.join()
|
|
587
|
+
print("Pool closed and joined for EXIF extraction")
|
|
567
588
|
|
|
568
589
|
return results
|
|
569
590
|
|
|
@@ -571,23 +592,23 @@ def _populate_exif_for_images(image_base,images,options=None):
|
|
|
571
592
|
def _write_exif_results(results,output_file):
|
|
572
593
|
"""
|
|
573
594
|
Write EXIF information to [output_file].
|
|
574
|
-
|
|
595
|
+
|
|
575
596
|
'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
|
|
576
597
|
|
|
577
|
-
Writes to .csv or .json depending on the extension of 'output_file'.
|
|
598
|
+
Writes to .csv or .json depending on the extension of 'output_file'.
|
|
578
599
|
"""
|
|
579
|
-
|
|
600
|
+
|
|
580
601
|
if output_file.endswith('.json'):
|
|
581
|
-
|
|
602
|
+
|
|
582
603
|
with open(output_file,'w') as f:
|
|
583
604
|
json.dump(results,f,indent=1,default=str)
|
|
584
|
-
|
|
605
|
+
|
|
585
606
|
elif output_file.endswith('.csv'):
|
|
586
|
-
|
|
607
|
+
|
|
587
608
|
# Find all EXIF tags that exist in any image
|
|
588
609
|
all_keys = set()
|
|
589
610
|
for im in results:
|
|
590
|
-
|
|
611
|
+
|
|
591
612
|
keys_this_image = set()
|
|
592
613
|
exif_tags = im['exif_tags']
|
|
593
614
|
file_name = im['file_name']
|
|
@@ -597,60 +618,64 @@ def _write_exif_results(results,output_file):
|
|
|
597
618
|
'Error: tag {} appears twice in image {}'.format(
|
|
598
619
|
tag_name,file_name)
|
|
599
620
|
all_keys.add(tag_name)
|
|
600
|
-
|
|
621
|
+
|
|
601
622
|
# ...for each tag in this image
|
|
602
|
-
|
|
623
|
+
|
|
603
624
|
# ...for each image
|
|
604
|
-
|
|
625
|
+
|
|
605
626
|
all_keys = sorted(list(all_keys))
|
|
606
|
-
|
|
627
|
+
|
|
607
628
|
header = ['File Name']
|
|
608
629
|
header.extend(all_keys)
|
|
609
|
-
|
|
630
|
+
|
|
610
631
|
import csv
|
|
611
632
|
with open(output_file,'w') as csvfile:
|
|
612
|
-
|
|
633
|
+
|
|
613
634
|
writer = csv.writer(csvfile)
|
|
614
|
-
|
|
635
|
+
|
|
615
636
|
# Write header
|
|
616
637
|
writer.writerow(header)
|
|
617
|
-
|
|
638
|
+
|
|
618
639
|
for im in results:
|
|
619
|
-
|
|
640
|
+
|
|
620
641
|
row = [im['file_name']]
|
|
621
642
|
kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
|
|
622
|
-
|
|
643
|
+
|
|
623
644
|
for i_key,key in enumerate(all_keys):
|
|
624
645
|
value = ''
|
|
625
646
|
if key in kvp_this_image:
|
|
626
647
|
value = kvp_this_image[key]
|
|
627
|
-
row.append(value)
|
|
648
|
+
row.append(value)
|
|
628
649
|
# ...for each key that *might* be present in this image
|
|
629
|
-
|
|
650
|
+
|
|
630
651
|
assert len(row) == len(header)
|
|
631
|
-
|
|
652
|
+
|
|
632
653
|
writer.writerow(row)
|
|
633
|
-
|
|
654
|
+
|
|
634
655
|
# ...for each image
|
|
635
|
-
|
|
656
|
+
|
|
636
657
|
# ...with open()
|
|
637
|
-
|
|
658
|
+
|
|
638
659
|
else:
|
|
639
|
-
|
|
660
|
+
|
|
640
661
|
raise ValueError('Could not determine output type from file {}'.format(
|
|
641
662
|
output_file))
|
|
642
|
-
|
|
663
|
+
|
|
643
664
|
# ...if we're writing to .json/.csv
|
|
644
|
-
|
|
665
|
+
|
|
645
666
|
print('Wrote results to {}'.format(output_file))
|
|
646
667
|
|
|
647
668
|
# ..._write_exif_results(...)
|
|
648
669
|
|
|
649
670
|
|
|
650
|
-
def read_exif_from_folder(input_folder,
|
|
671
|
+
def read_exif_from_folder(input_folder,
|
|
672
|
+
output_file=None,
|
|
673
|
+
options=None,
|
|
674
|
+
filenames=None,
|
|
675
|
+
recursive=True):
|
|
651
676
|
"""
|
|
652
677
|
Read EXIF data for a folder of images.
|
|
653
|
-
|
|
678
|
+
|
|
654
679
|
Args:
|
|
655
680
|
input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
|
|
656
681
|
paths
|
|
@@ -661,23 +686,22 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
661
686
|
a list of absolute filenames (if [input_folder] is None)
|
|
662
687
|
recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
|
|
663
688
|
is None.
|
|
664
|
-
|
|
665
|
-
|
|
689
|
+
|
|
666
690
|
Returns:
|
|
667
691
|
list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
|
|
668
692
|
* 'file_name': the relative path to the image
|
|
669
693
|
* 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
|
|
670
694
|
"""
|
|
671
|
-
|
|
695
|
+
|
|
672
696
|
if options is None:
|
|
673
697
|
options = ReadExifOptions()
|
|
674
|
-
|
|
698
|
+
|
|
675
699
|
# Validate options
|
|
676
700
|
if options.tags_to_include is not None:
|
|
677
701
|
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
678
702
|
if options.tags_to_exclude is not None:
|
|
679
|
-
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
680
|
-
|
|
703
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
704
|
+
|
|
681
705
|
if input_folder is None:
|
|
682
706
|
input_folder = ''
|
|
683
707
|
if len(input_folder) > 0:
|
|
@@ -686,20 +710,20 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
686
710
|
|
|
687
711
|
assert (len(input_folder) > 0) or (filenames is not None), \
|
|
688
712
|
'Must specify either a folder or a list of files'
|
|
689
|
-
|
|
690
|
-
if output_file is not None:
|
|
691
|
-
|
|
713
|
+
|
|
714
|
+
if output_file is not None:
|
|
715
|
+
|
|
692
716
|
assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
|
|
693
717
|
'I only know how to write results to .json or .csv'
|
|
694
|
-
|
|
718
|
+
|
|
695
719
|
try:
|
|
696
720
|
with open(output_file, 'a') as f:
|
|
697
721
|
if not f.writable():
|
|
698
|
-
raise
|
|
722
|
+
raise OSError('File not writable')
|
|
699
723
|
except Exception:
|
|
700
724
|
print('Could not write to file {}'.format(output_file))
|
|
701
725
|
raise
|
|
702
|
-
|
|
726
|
+
|
|
703
727
|
if options.processing_library == 'exif':
|
|
704
728
|
assert is_executable(options.exiftool_command_name), 'exiftool not available'
|
|
705
729
|
|
|
@@ -708,9 +732,9 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
708
732
|
else:
|
|
709
733
|
assert isinstance(filenames,list)
|
|
710
734
|
images = _create_image_objects(filenames)
|
|
711
|
-
|
|
735
|
+
|
|
712
736
|
results = _populate_exif_for_images(input_folder,images,options)
|
|
713
|
-
|
|
737
|
+
|
|
714
738
|
if output_file is not None:
|
|
715
739
|
try:
|
|
716
740
|
_write_exif_results(results,output_file)
|
|
@@ -718,8 +742,8 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
718
742
|
if not options.allow_write_error:
|
|
719
743
|
raise
|
|
720
744
|
else:
|
|
721
|
-
print('Warning: error serializing EXIF data: {}'.format(str(e)))
|
|
722
|
-
|
|
745
|
+
print('Warning: error serializing EXIF data: {}'.format(str(e)))
|
|
746
|
+
|
|
723
747
|
return results
|
|
724
748
|
|
|
725
749
|
# ...read_exif_from_folder(...)
|
|
@@ -728,54 +752,54 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
728
752
|
def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
729
753
|
"""
|
|
730
754
|
Given the EXIF results for a folder of images read via read_exif_from_folder,
|
|
731
|
-
create a COCO Camera Traps .json file that has no annotations, but
|
|
755
|
+
create a COCO Camera Traps .json file that has no annotations, but
|
|
732
756
|
attaches image filenames to locations and datetimes.
|
|
733
|
-
|
|
757
|
+
|
|
734
758
|
Args:
|
|
735
759
|
exif_results (str or list): the filename (or loaded list) containing the results
|
|
736
760
|
from read_exif_from_folder
|
|
737
|
-
cct_output_file (str,optional): the filename to which we should write
|
|
761
|
+
cct_output_file (str, optional): the filename to which we should write
|
|
738
762
|
COCO-Camera-Traps-formatted data
|
|
739
763
|
options (ExifResultsToCCTOptions, optional): options guiding the generation
|
|
740
764
|
of the CCT file, particularly location mapping
|
|
741
|
-
|
|
765
|
+
|
|
742
766
|
Returns:
|
|
743
767
|
dict: a COCO Camera Traps dict (with no annotations).
|
|
744
768
|
"""
|
|
745
|
-
|
|
769
|
+
|
|
746
770
|
if options is None:
|
|
747
771
|
options = ExifResultsToCCTOptions()
|
|
748
|
-
|
|
772
|
+
|
|
749
773
|
if isinstance(exif_results,str):
|
|
750
774
|
print('Reading EXIF results from {}'.format(exif_results))
|
|
751
775
|
with open(exif_results,'r') as f:
|
|
752
776
|
exif_results = json.load(f)
|
|
753
777
|
else:
|
|
754
778
|
assert isinstance(exif_results,list)
|
|
755
|
-
|
|
779
|
+
|
|
756
780
|
now = datetime.now()
|
|
757
781
|
|
|
758
782
|
image_info = []
|
|
759
783
|
|
|
760
784
|
images_without_datetime = []
|
|
761
785
|
images_with_invalid_datetime = []
|
|
762
|
-
|
|
786
|
+
|
|
763
787
|
# exif_result = exif_results[0]
|
|
764
788
|
for exif_result in tqdm(exif_results):
|
|
765
|
-
|
|
789
|
+
|
|
766
790
|
im = {}
|
|
767
|
-
|
|
791
|
+
|
|
768
792
|
# By default we assume that each leaf-node folder is a location
|
|
769
793
|
if options.filename_to_location_function is None:
|
|
770
794
|
im['location'] = 'unknown'
|
|
771
795
|
else:
|
|
772
|
-
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
773
|
-
|
|
796
|
+
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
797
|
+
|
|
774
798
|
im['file_name'] = exif_result['file_name']
|
|
775
799
|
im['id'] = im['file_name']
|
|
776
|
-
|
|
800
|
+
|
|
777
801
|
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
778
|
-
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
802
|
+
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
779
803
|
exif_dt = None
|
|
780
804
|
else:
|
|
781
805
|
exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
|
|
@@ -785,26 +809,26 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
785
809
|
images_without_datetime.append(im['file_name'])
|
|
786
810
|
else:
|
|
787
811
|
dt = exif_dt
|
|
788
|
-
|
|
812
|
+
|
|
789
813
|
# An image from the future (or within the last 24 hours) is invalid
|
|
790
814
|
if (now - dt).total_seconds() <= 1*24*60*60:
|
|
791
815
|
print('Warning: datetime for {} is {}'.format(
|
|
792
816
|
im['file_name'],dt))
|
|
793
|
-
im['datetime'] = None
|
|
817
|
+
im['datetime'] = None
|
|
794
818
|
images_with_invalid_datetime.append(im['file_name'])
|
|
795
|
-
|
|
819
|
+
|
|
796
820
|
# An image from before the dawn of time is also invalid
|
|
797
821
|
elif dt.year < options.min_valid_timestamp_year:
|
|
798
822
|
print('Warning: datetime for {} is {}'.format(
|
|
799
823
|
im['file_name'],dt))
|
|
800
824
|
im['datetime'] = None
|
|
801
825
|
images_with_invalid_datetime.append(im['file_name'])
|
|
802
|
-
|
|
826
|
+
|
|
803
827
|
else:
|
|
804
828
|
im['datetime'] = dt
|
|
805
829
|
|
|
806
830
|
image_info.append(im)
|
|
807
|
-
|
|
831
|
+
|
|
808
832
|
# ...for each exif image result
|
|
809
833
|
|
|
810
834
|
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
@@ -815,21 +839,21 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
815
839
|
d['images'] = image_info
|
|
816
840
|
d['annotations'] = []
|
|
817
841
|
d['categories'] = []
|
|
818
|
-
|
|
842
|
+
|
|
819
843
|
if cct_output_file is not None:
|
|
820
844
|
write_object_with_serialized_datetimes(d,cct_output_file)
|
|
821
|
-
|
|
845
|
+
|
|
822
846
|
return d
|
|
823
847
|
|
|
824
848
|
# ...exif_results_to_cct(...)
|
|
825
849
|
|
|
826
|
-
|
|
850
|
+
|
|
827
851
|
#%% Interactive driver
|
|
828
852
|
|
|
829
853
|
if False:
|
|
830
|
-
|
|
854
|
+
|
|
831
855
|
#%%
|
|
832
|
-
|
|
856
|
+
|
|
833
857
|
input_folder = r'C:\temp\md-name-testing'
|
|
834
858
|
output_file = None # r'C:\temp\md-name-testing\exif.json'
|
|
835
859
|
options = ReadExifOptions()
|
|
@@ -838,30 +862,28 @@ if False:
|
|
|
838
862
|
options.use_threads = False
|
|
839
863
|
options.processing_library = 'pil'
|
|
840
864
|
# options.processing_library = 'exiftool'
|
|
841
|
-
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
|
|
865
|
+
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
|
|
866
|
+
'ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
|
|
842
867
|
# options.tags_to_exclude = ['MakerNote']
|
|
843
|
-
|
|
868
|
+
|
|
844
869
|
results = read_exif_from_folder(input_folder,output_file,options)
|
|
845
870
|
|
|
846
871
|
#%%
|
|
847
|
-
|
|
872
|
+
|
|
848
873
|
with open(output_file,'r') as f:
|
|
849
874
|
d = json.load(f)
|
|
850
|
-
|
|
851
875
|
|
|
852
|
-
#%% Command-line driver
|
|
853
876
|
|
|
854
|
-
|
|
855
|
-
import sys
|
|
877
|
+
#%% Command-line driver
|
|
856
878
|
|
|
857
|
-
def main():
|
|
879
|
+
def main(): # noqa
|
|
858
880
|
|
|
859
881
|
options = ReadExifOptions()
|
|
860
|
-
|
|
882
|
+
|
|
861
883
|
parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
|
|
862
884
|
' a folder, and write the results to .csv or .json'))
|
|
863
885
|
|
|
864
|
-
parser.add_argument('input_folder', type=str,
|
|
886
|
+
parser.add_argument('input_folder', type=str,
|
|
865
887
|
help='Folder of images from which we should read EXIF information')
|
|
866
888
|
parser.add_argument('output_file', type=str,
|
|
867
889
|
help='Output file (.json) to which we should write EXIF information')
|
|
@@ -871,16 +893,16 @@ def main():
|
|
|
871
893
|
help='Use threads (instead of processes) for multitasking')
|
|
872
894
|
parser.add_argument('--processing_library', type=str, default=options.processing_library,
|
|
873
895
|
help='Processing library (exif or pil)')
|
|
874
|
-
|
|
896
|
+
|
|
875
897
|
if len(sys.argv[1:]) == 0:
|
|
876
898
|
parser.print_help()
|
|
877
899
|
parser.exit()
|
|
878
900
|
|
|
879
|
-
args = parser.parse_args()
|
|
901
|
+
args = parser.parse_args()
|
|
880
902
|
args_to_object(args, options)
|
|
881
903
|
options.processing_library = options.processing_library.lower()
|
|
882
|
-
|
|
904
|
+
|
|
883
905
|
read_exif_from_folder(args.input_folder,args.output_file,options)
|
|
884
|
-
|
|
906
|
+
|
|
885
907
|
if __name__ == '__main__':
|
|
886
908
|
main()
|