megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
data_management/read_exif.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
read_exif.py
|
|
4
|
+
|
|
5
|
+
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
+
and writes them to a .json or .csv file.
|
|
7
|
+
|
|
8
|
+
This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
|
|
9
|
+
can read everything). The latter approach expects that exiftool is available on the system
|
|
10
|
+
path. No attempt is made to be consistent in format across the two approaches.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
13
|
|
|
14
14
|
#%% Imports and constants
|
|
15
15
|
|
|
@@ -24,7 +24,7 @@ from multiprocessing.pool import Pool as Pool
|
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
from PIL import Image, ExifTags
|
|
26
26
|
|
|
27
|
-
from md_utils.path_utils import find_images
|
|
27
|
+
from md_utils.path_utils import find_images, is_executable
|
|
28
28
|
from md_utils.ct_utils import args_to_object
|
|
29
29
|
|
|
30
30
|
debug_max_images = None
|
|
@@ -33,54 +33,61 @@ debug_max_images = None
|
|
|
33
33
|
#%% Options
|
|
34
34
|
|
|
35
35
|
class ReadExifOptions:
|
|
36
|
+
"""
|
|
37
|
+
Parameters controlling metadata extraction.
|
|
38
|
+
"""
|
|
36
39
|
|
|
40
|
+
#: Enable additional debug console output
|
|
37
41
|
verbose = False
|
|
38
42
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
43
|
+
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
44
|
+
#: and we encounter a serialization issue, we'll return the results but won't
|
|
45
|
+
#: error.
|
|
42
46
|
allow_write_error = False
|
|
43
47
|
|
|
44
|
-
|
|
48
|
+
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
45
49
|
n_workers = 1
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
#: Should we use threads (vs. processes) for parallelization?
|
|
52
|
+
#:
|
|
53
|
+
#: Not relevant if n_workers is <= 1.
|
|
50
54
|
use_threads = True
|
|
51
|
-
|
|
55
|
+
|
|
56
|
+
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
57
|
+
#: doesn't come from EXIF, rather from the file (e.g. file size).
|
|
52
58
|
tag_types_to_ignore = set(['File','ExifTool'])
|
|
53
59
|
|
|
60
|
+
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
61
|
+
#:
|
|
62
|
+
#: A useful set of tags one might want to limit queries for:
|
|
63
|
+
#:
|
|
64
|
+
#: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
|
|
65
|
+
#: 'DateTimeOriginal','Orientation']
|
|
66
|
+
tags_to_include = None
|
|
67
|
+
|
|
68
|
+
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
69
|
+
tags_to_exclude = None
|
|
70
|
+
|
|
71
|
+
#: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
|
|
72
|
+
#: can be just "exiftool", in which case it should be on your system path.
|
|
54
73
|
exiftool_command_name = 'exiftool'
|
|
55
74
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
75
|
+
#: How should we handle byte-formatted EXIF tags?
|
|
76
|
+
#:
|
|
77
|
+
#: 'convert_to_string': convert to a Python string
|
|
78
|
+
#: 'delete': don't include at all
|
|
79
|
+
#: 'raw': include as a byte string
|
|
61
80
|
byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
62
81
|
|
|
63
|
-
|
|
82
|
+
#: Should we use exiftool or PIL?
|
|
64
83
|
processing_library = 'pil' # 'exiftool','pil'
|
|
65
|
-
|
|
84
|
+
|
|
66
85
|
|
|
67
86
|
#%% Functions
|
|
68
87
|
|
|
69
|
-
def
|
|
88
|
+
def _get_exif_ifd(exif):
|
|
70
89
|
"""
|
|
71
|
-
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
image_files = find_images(input_folder,recursive=True)
|
|
75
|
-
image_files = [os.path.relpath(s,input_folder) for s in image_files]
|
|
76
|
-
image_files = [s.replace('\\','/') for s in image_files]
|
|
77
|
-
print('Enumerated {} files'.format(len(image_files)))
|
|
78
|
-
return image_files
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def get_exif_ifd(exif):
|
|
82
|
-
"""
|
|
83
|
-
Read EXIF data by finding the EXIF offset and reading tags directly
|
|
90
|
+
Read EXIF data from by finding the EXIF offset and reading tags directly
|
|
84
91
|
|
|
85
92
|
https://github.com/python-pillow/Pillow/issues/5863
|
|
86
93
|
"""
|
|
@@ -98,8 +105,16 @@ def get_exif_ifd(exif):
|
|
|
98
105
|
|
|
99
106
|
def read_pil_exif(im,options=None):
|
|
100
107
|
"""
|
|
101
|
-
Read all the EXIF data we know how to read from
|
|
102
|
-
|
|
108
|
+
Read all the EXIF data we know how to read from an image, using PIL. This is primarily
|
|
109
|
+
an internal function; the main entry point for single-image EXIF information is
|
|
110
|
+
read_exif_tags_for_image().
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
114
|
+
we should read EXIF data.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
dict: a dictionary mapping EXIF tag names to their values
|
|
103
118
|
"""
|
|
104
119
|
|
|
105
120
|
if options is None:
|
|
@@ -128,10 +143,10 @@ def read_pil_exif(im,options=None):
|
|
|
128
143
|
# print('Warning: unrecognized EXIF tag: {}'.format(k))
|
|
129
144
|
exif_tags[k] = str(v)
|
|
130
145
|
|
|
131
|
-
|
|
146
|
+
exif_ifd_tags = _get_exif_ifd(exif_info)
|
|
132
147
|
|
|
133
|
-
for k in
|
|
134
|
-
v =
|
|
148
|
+
for k in exif_ifd_tags.keys():
|
|
149
|
+
v = exif_ifd_tags[k]
|
|
135
150
|
if k in exif_tags:
|
|
136
151
|
if options.verbose:
|
|
137
152
|
print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
|
|
@@ -167,8 +182,8 @@ def read_pil_exif(im,options=None):
|
|
|
167
182
|
|
|
168
183
|
def format_datetime_as_exif_datetime_string(dt):
|
|
169
184
|
"""
|
|
170
|
-
Returns a Python datetime object rendered using the standard
|
|
171
|
-
string format
|
|
185
|
+
Returns a Python datetime object rendered using the standard EXIF datetime
|
|
186
|
+
string format ('%Y:%m:%d %H:%M:%S')
|
|
172
187
|
"""
|
|
173
188
|
|
|
174
189
|
return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
|
|
@@ -180,7 +195,14 @@ def parse_exif_datetime_string(s,verbose=False):
|
|
|
180
195
|
|
|
181
196
|
%Y:%m:%d %H:%M:%S
|
|
182
197
|
|
|
183
|
-
|
|
198
|
+
Parses one of those strings into a Python datetime object.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
s (str): datetime string to parse, should be in standard EXIF datetime format
|
|
202
|
+
verbose (bool, optional): enable additional debug output
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
datetime: the datetime object created from [s]
|
|
184
206
|
"""
|
|
185
207
|
|
|
186
208
|
dt = None
|
|
@@ -192,17 +214,43 @@ def parse_exif_datetime_string(s,verbose=False):
|
|
|
192
214
|
return dt
|
|
193
215
|
|
|
194
216
|
|
|
217
|
+
def _filter_tags(tags,options):
|
|
218
|
+
"""
|
|
219
|
+
Internal function used to include/exclude specific tags from the exif_tags
|
|
220
|
+
dict.
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
if options is None:
|
|
224
|
+
return tags
|
|
225
|
+
if options.tags_to_include is None and options.tags_to_exclude is None:
|
|
226
|
+
return tags
|
|
227
|
+
if options.tags_to_include is not None:
|
|
228
|
+
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
229
|
+
tags_to_return = {}
|
|
230
|
+
for tag_name in tags.keys():
|
|
231
|
+
if tag_name in options.tags_to_include:
|
|
232
|
+
tags_to_return[tag_name] = tags[tag_name]
|
|
233
|
+
return tags_to_return
|
|
234
|
+
if options.tags_to_exclude is not None:
|
|
235
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
236
|
+
tags_to_return = {}
|
|
237
|
+
for tag_name in tags.keys():
|
|
238
|
+
if tag_name not in options.tags_to_exclude:
|
|
239
|
+
tags_to_return[tag_name] = tags[tag_name]
|
|
240
|
+
return tags_to_return
|
|
241
|
+
|
|
242
|
+
|
|
195
243
|
def read_exif_tags_for_image(file_path,options=None):
|
|
196
244
|
"""
|
|
197
245
|
Get relevant fields from EXIF data for an image
|
|
198
246
|
|
|
199
|
-
Returns
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
247
|
+
Returns:
|
|
248
|
+
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
249
|
+
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
250
|
+
options.processing_library:
|
|
251
|
+
|
|
252
|
+
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
|
|
253
|
+
- For PIL, 'tags' is a dict (str:str)
|
|
206
254
|
"""
|
|
207
255
|
|
|
208
256
|
if options is None:
|
|
@@ -227,8 +275,8 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
227
275
|
result['status'] = 'empty_read'
|
|
228
276
|
else:
|
|
229
277
|
result['status'] = 'success'
|
|
230
|
-
result['tags'] = exif_tags
|
|
231
|
-
|
|
278
|
+
result['tags'] = _filter_tags(exif_tags,options)
|
|
279
|
+
|
|
232
280
|
return result
|
|
233
281
|
|
|
234
282
|
elif options.processing_library == 'exiftool':
|
|
@@ -283,9 +331,12 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
283
331
|
print('Ignoring tag with type {}'.format(field_type))
|
|
284
332
|
continue
|
|
285
333
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
334
|
+
field_name = field_name_type_tokens[1].strip()
|
|
335
|
+
if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
|
|
336
|
+
continue
|
|
337
|
+
if options.tags_to_include is not None and field_name not in options.tags_to_include:
|
|
338
|
+
continue
|
|
339
|
+
tag = [field_type,field_name,field_value]
|
|
289
340
|
|
|
290
341
|
exif_tags.append(tag)
|
|
291
342
|
|
|
@@ -305,7 +356,7 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
305
356
|
# ...read_exif_tags_for_image()
|
|
306
357
|
|
|
307
358
|
|
|
308
|
-
def
|
|
359
|
+
def _populate_exif_data(im, image_base, options=None):
|
|
309
360
|
"""
|
|
310
361
|
Populate EXIF data into the 'exif_tags' field in the image object [im].
|
|
311
362
|
|
|
@@ -347,23 +398,28 @@ def populate_exif_data(im, image_base, options=None):
|
|
|
347
398
|
|
|
348
399
|
return im
|
|
349
400
|
|
|
350
|
-
# ...
|
|
401
|
+
# ..._populate_exif_data()
|
|
351
402
|
|
|
352
403
|
|
|
353
|
-
def
|
|
404
|
+
def _create_image_objects(image_files,recursive=True):
|
|
354
405
|
"""
|
|
355
406
|
Create empty image objects for every image in [image_files], which can be a
|
|
356
407
|
list of relative paths (which will get stored without processing, so the base
|
|
357
408
|
path doesn't matter here), or a folder name.
|
|
358
409
|
|
|
359
410
|
Returns a list of dicts with field 'file_name' (a relative path).
|
|
411
|
+
|
|
412
|
+
"recursive" is ignored if "image_files" is a list.
|
|
360
413
|
"""
|
|
361
414
|
|
|
362
415
|
# Enumerate *relative* paths
|
|
363
416
|
if isinstance(image_files,str):
|
|
364
417
|
print('Enumerating image files in {}'.format(image_files))
|
|
365
418
|
assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
|
|
366
|
-
image_files =
|
|
419
|
+
image_files = find_images(image_files,
|
|
420
|
+
recursive=recursive,
|
|
421
|
+
return_relative_paths=True,
|
|
422
|
+
convert_slashes=True)
|
|
367
423
|
|
|
368
424
|
images = []
|
|
369
425
|
for fn in image_files:
|
|
@@ -378,7 +434,7 @@ def create_image_objects(image_files):
|
|
|
378
434
|
return images
|
|
379
435
|
|
|
380
436
|
|
|
381
|
-
def
|
|
437
|
+
def _populate_exif_for_images(image_base,images,options=None):
|
|
382
438
|
"""
|
|
383
439
|
Main worker loop: read EXIF data for each image object in [images] and
|
|
384
440
|
populate the image objects.
|
|
@@ -394,7 +450,7 @@ def populate_exif_for_images(image_base,images,options=None):
|
|
|
394
450
|
|
|
395
451
|
results = []
|
|
396
452
|
for im in tqdm(images):
|
|
397
|
-
results.append(
|
|
453
|
+
results.append(_populate_exif_data(im,image_base,options))
|
|
398
454
|
|
|
399
455
|
else:
|
|
400
456
|
|
|
@@ -406,13 +462,13 @@ def populate_exif_for_images(image_base,images,options=None):
|
|
|
406
462
|
print('Starting parallel process pool with {} workers'.format(options.n_workers))
|
|
407
463
|
pool = Pool(options.n_workers)
|
|
408
464
|
|
|
409
|
-
results = list(tqdm(pool.imap(partial(
|
|
465
|
+
results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
|
|
410
466
|
options=options),images),total=len(images)))
|
|
411
467
|
|
|
412
468
|
return results
|
|
413
469
|
|
|
414
470
|
|
|
415
|
-
def
|
|
471
|
+
def _write_exif_results(results,output_file):
|
|
416
472
|
"""
|
|
417
473
|
Write EXIF information to [output_file].
|
|
418
474
|
|
|
@@ -489,33 +545,35 @@ def write_exif_results(results,output_file):
|
|
|
489
545
|
print('Wrote results to {}'.format(output_file))
|
|
490
546
|
|
|
491
547
|
|
|
492
|
-
def
|
|
493
|
-
|
|
494
|
-
"""Check whether `name` is on PATH and marked as executable."""
|
|
495
|
-
|
|
496
|
-
# https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
|
|
497
|
-
|
|
498
|
-
from shutil import which
|
|
499
|
-
return which(name) is not None
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None):
|
|
548
|
+
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
|
|
503
549
|
"""
|
|
504
|
-
Read EXIF data for
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
550
|
+
Read EXIF data for a folder of images.
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
|
|
554
|
+
paths
|
|
555
|
+
output_file (str, optional): .json file to which we should write results; if this is None, results
|
|
556
|
+
are returned but not written to disk
|
|
557
|
+
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
558
|
+
filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
|
|
559
|
+
a list of absolute filenames (if [input_folder] is None)
|
|
560
|
+
recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
|
|
561
|
+
is None.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
|
|
565
|
+
we're using PIL or exiftool.
|
|
514
566
|
"""
|
|
515
567
|
|
|
516
568
|
if options is None:
|
|
517
569
|
options = ReadExifOptions()
|
|
518
570
|
|
|
571
|
+
# Validate options
|
|
572
|
+
if options.tags_to_include is not None:
|
|
573
|
+
assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
574
|
+
if options.tags_to_exclude is not None:
|
|
575
|
+
assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
|
|
576
|
+
|
|
519
577
|
if input_folder is None:
|
|
520
578
|
input_folder = ''
|
|
521
579
|
if len(input_folder) > 0:
|
|
@@ -542,16 +600,16 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
542
600
|
assert is_executable(options.exiftool_command_name), 'exiftool not available'
|
|
543
601
|
|
|
544
602
|
if filenames is None:
|
|
545
|
-
images =
|
|
603
|
+
images = _create_image_objects(input_folder,recursive=recursive)
|
|
546
604
|
else:
|
|
547
605
|
assert isinstance(filenames,list)
|
|
548
|
-
images =
|
|
606
|
+
images = _create_image_objects(filenames)
|
|
549
607
|
|
|
550
|
-
results =
|
|
608
|
+
results = _populate_exif_for_images(input_folder,images,options)
|
|
551
609
|
|
|
552
610
|
if output_file is not None:
|
|
553
611
|
try:
|
|
554
|
-
|
|
612
|
+
_write_exif_results(results,output_file)
|
|
555
613
|
except Exception as e:
|
|
556
614
|
if not options.allow_write_error:
|
|
557
615
|
raise
|
|
@@ -567,14 +625,16 @@ if False:
|
|
|
567
625
|
|
|
568
626
|
#%%
|
|
569
627
|
|
|
570
|
-
input_folder =
|
|
571
|
-
output_file =
|
|
628
|
+
input_folder = r'C:\temp\md-name-testing'
|
|
629
|
+
output_file = None # r'C:\temp\md-name-testing\exif.json'
|
|
572
630
|
options = ReadExifOptions()
|
|
573
631
|
options.verbose = False
|
|
574
632
|
options.n_workers = 10
|
|
575
633
|
options.use_threads = False
|
|
576
634
|
options.processing_library = 'pil'
|
|
577
635
|
# options.processing_library = 'exiftool'
|
|
636
|
+
options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
|
|
637
|
+
# options.tags_to_exclude = ['MakerNote']
|
|
578
638
|
|
|
579
639
|
results = read_exif_from_folder(input_folder,output_file,options)
|
|
580
640
|
|
|
@@ -596,8 +656,10 @@ def main():
|
|
|
596
656
|
parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
|
|
597
657
|
' a folder, and write the results to .csv or .json'))
|
|
598
658
|
|
|
599
|
-
parser.add_argument('input_folder', type=str
|
|
600
|
-
|
|
659
|
+
parser.add_argument('input_folder', type=str,
|
|
660
|
+
help='Folder of images from which we should read EXIF information')
|
|
661
|
+
parser.add_argument('output_file', type=str,
|
|
662
|
+
help='Output file (.json) to which we should write EXIF information')
|
|
601
663
|
parser.add_argument('--n_workers', type=int, default=1,
|
|
602
664
|
help='Number of concurrent workers to use (defaults to 1)')
|
|
603
665
|
parser.add_argument('--use_threads', action='store_true',
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
remap_coco_categories.py
|
|
4
|
+
|
|
5
|
+
Given a COCO-formatted dataset, remap the categories to a new mapping.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Imports and constants
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
from copy import deepcopy
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#%% Main function
|
|
18
|
+
|
|
19
|
+
def remap_coco_categories(input_data,
|
|
20
|
+
output_category_name_to_id,
|
|
21
|
+
input_category_name_to_output_category_name,
|
|
22
|
+
output_file=None):
|
|
23
|
+
"""
|
|
24
|
+
Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
|
|
25
|
+
writing the results to a new file.
|
|
26
|
+
|
|
27
|
+
output_category_name_to_id is a dict mapping strings to ints.
|
|
28
|
+
|
|
29
|
+
input_category_name_to_output_category_name is a dict mapping strings to strings.
|
|
30
|
+
|
|
31
|
+
[input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
|
|
32
|
+
not modified in place.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
if isinstance(input_data,str):
|
|
36
|
+
assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
|
|
37
|
+
with open(input_data,'r') as f:
|
|
38
|
+
input_data = json.load(f)
|
|
39
|
+
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
40
|
+
else:
|
|
41
|
+
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
42
|
+
input_data = deepcopy(input_data)
|
|
43
|
+
|
|
44
|
+
# It's safe to modify in-place now
|
|
45
|
+
output_data = input_data
|
|
46
|
+
|
|
47
|
+
# Read input name --> ID mapping
|
|
48
|
+
input_category_name_to_input_category_id = {}
|
|
49
|
+
for c in input_data['categories']:
|
|
50
|
+
input_category_name_to_input_category_id[c['name']] = c['id']
|
|
51
|
+
|
|
52
|
+
# Map input IDs --> output IDs
|
|
53
|
+
input_category_id_to_output_category_id = {}
|
|
54
|
+
for input_name in input_category_name_to_output_category_name.keys():
|
|
55
|
+
output_name = input_category_name_to_output_category_name[input_name]
|
|
56
|
+
assert output_name in output_category_name_to_id, \
|
|
57
|
+
'No output ID for {} --> {}'.format(input_name,output_name)
|
|
58
|
+
input_id = input_category_name_to_input_category_id[input_name]
|
|
59
|
+
output_id = output_category_name_to_id[output_name]
|
|
60
|
+
input_category_id_to_output_category_id[input_id] = output_id
|
|
61
|
+
|
|
62
|
+
# Map annotations
|
|
63
|
+
for ann in output_data['annotations']:
|
|
64
|
+
assert ann['category_id'] in input_category_id_to_output_category_id, \
|
|
65
|
+
'Unrecognized category ID {}'.format(ann['category_id'])
|
|
66
|
+
ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
|
|
67
|
+
|
|
68
|
+
# Update the category list
|
|
69
|
+
output_categories = []
|
|
70
|
+
for output_name in output_category_name_to_id:
|
|
71
|
+
category = {'name':output_name,'id':output_category_name_to_id[output_name]}
|
|
72
|
+
output_categories.append(category)
|
|
73
|
+
output_data['categories'] = output_categories
|
|
74
|
+
|
|
75
|
+
if output_file is not None:
|
|
76
|
+
with open(output_file,'w') as f:
|
|
77
|
+
json.dump(output_data,f,indent=1)
|
|
78
|
+
|
|
79
|
+
return input_data
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
#%% Command-line driver
|
|
83
|
+
|
|
84
|
+
# TODO
|
data_management/remove_exif.py
CHANGED
|
@@ -1,70 +1,66 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
remove_exif.py
|
|
4
|
+
|
|
5
|
+
Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
|
|
6
|
+
backup copies, using pyexiv2.
|
|
7
|
+
|
|
8
|
+
TODO: This is a one-off script waiting to be cleaned up for more general use.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
input_base = r'f:\images'
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
#%% Imports and constants
|
|
11
16
|
|
|
12
17
|
import os
|
|
13
18
|
import glob
|
|
14
19
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
|
|
22
|
-
image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
import pyexiv2
|
|
28
|
-
|
|
29
|
-
# PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
|
|
30
|
-
def remove_exif(fn):
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
img = pyexiv2.Image(fn)
|
|
34
|
-
# data = img.read_exif(); print(data)
|
|
35
|
-
img.clear_exif()
|
|
36
|
-
img.clear_iptc()
|
|
37
|
-
img.clear_xmp()
|
|
38
|
-
img.close()
|
|
39
|
-
except Exception as e:
|
|
40
|
-
print('EXIF error on {}: {}'.format(fn,str(e)))
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
#%% Debug
|
|
44
|
-
|
|
45
|
-
if False:
|
|
46
|
-
#%%
|
|
47
|
-
fn = image_files[-10001]
|
|
48
|
-
os.startfile(fn)
|
|
49
|
-
#%%
|
|
50
|
-
remove_exif(fn)
|
|
51
|
-
os.startfile(fn)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
#%% Remove EXIF data (execution)
|
|
55
|
-
|
|
56
|
-
from joblib import Parallel, delayed
|
|
57
|
-
|
|
58
|
-
n_exif_threads = 50
|
|
59
|
-
|
|
60
|
-
if n_exif_threads == 1:
|
|
61
|
-
|
|
62
|
-
# fn = image_files[0]
|
|
63
|
-
for fn in image_files:
|
|
64
|
-
remove_exif(fn)
|
|
20
|
+
def main():
|
|
21
|
+
|
|
22
|
+
assert os.path.isdir(input_base)
|
|
23
|
+
|
|
24
|
+
##%% List files
|
|
25
|
+
|
|
26
|
+
all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
|
|
27
|
+
image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
##%% Remove EXIF data (support)
|
|
31
|
+
|
|
32
|
+
import pyexiv2
|
|
33
|
+
|
|
34
|
+
# PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
|
|
35
|
+
def remove_exif(fn):
|
|
65
36
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
37
|
+
try:
|
|
38
|
+
img = pyexiv2.Image(fn)
|
|
39
|
+
# data = img.read_exif(); print(data)
|
|
40
|
+
img.clear_exif()
|
|
41
|
+
img.clear_iptc()
|
|
42
|
+
img.clear_xmp()
|
|
43
|
+
img.close()
|
|
44
|
+
except Exception as e:
|
|
45
|
+
print('EXIF error on {}: {}'.format(fn,str(e)))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
##%% Remove EXIF data (execution)
|
|
49
|
+
|
|
50
|
+
from joblib import Parallel, delayed
|
|
70
51
|
|
|
52
|
+
n_exif_threads = 50
|
|
53
|
+
|
|
54
|
+
if n_exif_threads == 1:
|
|
55
|
+
|
|
56
|
+
# fn = image_files[0]
|
|
57
|
+
for fn in image_files:
|
|
58
|
+
remove_exif(fn)
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
# joblib.Parallel defaults to a process-based backend, but let's be sure
|
|
62
|
+
# results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
|
|
63
|
+
_ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
|
|
64
|
+
|
|
65
|
+
if __name__ == '__main__':
|
|
66
|
+
main()
|