megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
data_management/read_exif.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
read_exif.py
|
|
4
|
+
|
|
5
|
+
Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
|
|
6
|
+
and writes them to a .json or .csv file.
|
|
7
|
+
|
|
8
|
+
This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
|
|
9
|
+
can read everything). The latter approach expects that exiftool is available on the system
|
|
10
|
+
path. No attempt is made to be consistent in format across the two approaches.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
13
|
|
|
14
14
|
#%% Imports and constants
|
|
15
15
|
|
|
@@ -24,7 +24,7 @@ from multiprocessing.pool import Pool as Pool
|
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
from PIL import Image, ExifTags
|
|
26
26
|
|
|
27
|
-
from md_utils.path_utils import find_images
|
|
27
|
+
from md_utils.path_utils import find_images, is_executable
|
|
28
28
|
from md_utils.ct_utils import args_to_object
|
|
29
29
|
|
|
30
30
|
debug_max_images = None
|
|
@@ -33,64 +33,61 @@ debug_max_images = None
|
|
|
33
33
|
#%% Options
|
|
34
34
|
|
|
35
35
|
class ReadExifOptions:
|
|
36
|
+
"""
|
|
37
|
+
Parameters controlling metadata extraction.
|
|
38
|
+
"""
|
|
36
39
|
|
|
40
|
+
#: Enable additional debug console output
|
|
37
41
|
verbose = False
|
|
38
42
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
43
|
+
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
44
|
+
#: and we encounter a serialization issue, we'll return the results but won't
|
|
45
|
+
#: error.
|
|
42
46
|
allow_write_error = False
|
|
43
47
|
|
|
44
|
-
|
|
48
|
+
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
45
49
|
n_workers = 1
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
#: Should we use threads (vs. processes) for parallelization?
|
|
52
|
+
#:
|
|
53
|
+
#: Not relevant if n_workers is <= 1.
|
|
50
54
|
use_threads = True
|
|
51
55
|
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
57
|
+
#: doesn't come from EXIF, rather from the file (e.g. file size).
|
|
54
58
|
tag_types_to_ignore = set(['File','ExifTool'])
|
|
55
59
|
|
|
56
|
-
|
|
60
|
+
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
61
|
+
#:
|
|
62
|
+
#: A useful set of tags one might want to limit queries for:
|
|
63
|
+
#:
|
|
64
|
+
#: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
|
|
65
|
+
#: 'DateTimeOriginal','Orientation']
|
|
57
66
|
tags_to_include = None
|
|
58
|
-
tags_to_exclude = None
|
|
59
67
|
|
|
60
|
-
|
|
61
|
-
|
|
68
|
+
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
69
|
+
tags_to_exclude = None
|
|
62
70
|
|
|
71
|
+
#: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
|
|
72
|
+
#: can be just "exiftool", in which case it should be on your system path.
|
|
63
73
|
exiftool_command_name = 'exiftool'
|
|
64
74
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
75
|
+
#: How should we handle byte-formatted EXIF tags?
|
|
76
|
+
#:
|
|
77
|
+
#: 'convert_to_string': convert to a Python string
|
|
78
|
+
#: 'delete': don't include at all
|
|
79
|
+
#: 'raw': include as a byte string
|
|
70
80
|
byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
71
81
|
|
|
72
|
-
|
|
82
|
+
#: Should we use exiftool or PIL?
|
|
73
83
|
processing_library = 'pil' # 'exiftool','pil'
|
|
74
|
-
|
|
75
|
-
|
|
84
|
+
|
|
76
85
|
|
|
77
86
|
#%% Functions
|
|
78
87
|
|
|
79
|
-
def
|
|
80
|
-
"""
|
|
81
|
-
Enumerates all image files in input_folder, returning relative paths
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
image_files = find_images(input_folder,recursive=recursive)
|
|
85
|
-
image_files = [os.path.relpath(s,input_folder) for s in image_files]
|
|
86
|
-
image_files = [s.replace('\\','/') for s in image_files]
|
|
87
|
-
print('Enumerated {} files'.format(len(image_files)))
|
|
88
|
-
return image_files
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def get_exif_ifd(exif):
|
|
88
|
+
def _get_exif_ifd(exif):
|
|
92
89
|
"""
|
|
93
|
-
Read EXIF data by finding the EXIF offset and reading tags directly
|
|
90
|
+
Read EXIF data from by finding the EXIF offset and reading tags directly
|
|
94
91
|
|
|
95
92
|
https://github.com/python-pillow/Pillow/issues/5863
|
|
96
93
|
"""
|
|
@@ -108,8 +105,16 @@ def get_exif_ifd(exif):
|
|
|
108
105
|
|
|
109
106
|
def read_pil_exif(im,options=None):
|
|
110
107
|
"""
|
|
111
|
-
Read all the EXIF data we know how to read from
|
|
112
|
-
|
|
108
|
+
Read all the EXIF data we know how to read from an image, using PIL. This is primarily
|
|
109
|
+
an internal function; the main entry point for single-image EXIF information is
|
|
110
|
+
read_exif_tags_for_image().
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
im (str or PIL.Image.Image): image (as a filename or an Image object) from which
|
|
114
|
+
we should read EXIF data.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
dict: a dictionary mapping EXIF tag names to their values
|
|
113
118
|
"""
|
|
114
119
|
|
|
115
120
|
if options is None:
|
|
@@ -138,10 +143,10 @@ def read_pil_exif(im,options=None):
|
|
|
138
143
|
# print('Warning: unrecognized EXIF tag: {}'.format(k))
|
|
139
144
|
exif_tags[k] = str(v)
|
|
140
145
|
|
|
141
|
-
|
|
146
|
+
exif_ifd_tags = _get_exif_ifd(exif_info)
|
|
142
147
|
|
|
143
|
-
for k in
|
|
144
|
-
v =
|
|
148
|
+
for k in exif_ifd_tags.keys():
|
|
149
|
+
v = exif_ifd_tags[k]
|
|
145
150
|
if k in exif_tags:
|
|
146
151
|
if options.verbose:
|
|
147
152
|
print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
|
|
@@ -177,8 +182,8 @@ def read_pil_exif(im,options=None):
|
|
|
177
182
|
|
|
178
183
|
def format_datetime_as_exif_datetime_string(dt):
|
|
179
184
|
"""
|
|
180
|
-
Returns a Python datetime object rendered using the standard
|
|
181
|
-
string format
|
|
185
|
+
Returns a Python datetime object rendered using the standard EXIF datetime
|
|
186
|
+
string format ('%Y:%m:%d %H:%M:%S')
|
|
182
187
|
"""
|
|
183
188
|
|
|
184
189
|
return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
|
|
@@ -190,7 +195,14 @@ def parse_exif_datetime_string(s,verbose=False):
|
|
|
190
195
|
|
|
191
196
|
%Y:%m:%d %H:%M:%S
|
|
192
197
|
|
|
193
|
-
|
|
198
|
+
Parses one of those strings into a Python datetime object.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
s (str): datetime string to parse, should be in standard EXIF datetime format
|
|
202
|
+
verbose (bool, optional): enable additional debug output
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
datetime: the datetime object created from [s]
|
|
194
206
|
"""
|
|
195
207
|
|
|
196
208
|
dt = None
|
|
@@ -232,13 +244,13 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
232
244
|
"""
|
|
233
245
|
Get relevant fields from EXIF data for an image
|
|
234
246
|
|
|
235
|
-
Returns
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
247
|
+
Returns:
|
|
248
|
+
dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
|
|
249
|
+
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
250
|
+
options.processing_library:
|
|
251
|
+
|
|
252
|
+
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
|
|
253
|
+
- For PIL, 'tags' is a dict (str:str)
|
|
242
254
|
"""
|
|
243
255
|
|
|
244
256
|
if options is None:
|
|
@@ -344,7 +356,7 @@ def read_exif_tags_for_image(file_path,options=None):
|
|
|
344
356
|
# ...read_exif_tags_for_image()
|
|
345
357
|
|
|
346
358
|
|
|
347
|
-
def
|
|
359
|
+
def _populate_exif_data(im, image_base, options=None):
|
|
348
360
|
"""
|
|
349
361
|
Populate EXIF data into the 'exif_tags' field in the image object [im].
|
|
350
362
|
|
|
@@ -386,10 +398,10 @@ def populate_exif_data(im, image_base, options=None):
|
|
|
386
398
|
|
|
387
399
|
return im
|
|
388
400
|
|
|
389
|
-
# ...
|
|
401
|
+
# ..._populate_exif_data()
|
|
390
402
|
|
|
391
403
|
|
|
392
|
-
def
|
|
404
|
+
def _create_image_objects(image_files,recursive=True):
|
|
393
405
|
"""
|
|
394
406
|
Create empty image objects for every image in [image_files], which can be a
|
|
395
407
|
list of relative paths (which will get stored without processing, so the base
|
|
@@ -404,7 +416,10 @@ def create_image_objects(image_files,recursive=True):
|
|
|
404
416
|
if isinstance(image_files,str):
|
|
405
417
|
print('Enumerating image files in {}'.format(image_files))
|
|
406
418
|
assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
|
|
407
|
-
image_files =
|
|
419
|
+
image_files = find_images(image_files,
|
|
420
|
+
recursive=recursive,
|
|
421
|
+
return_relative_paths=True,
|
|
422
|
+
convert_slashes=True)
|
|
408
423
|
|
|
409
424
|
images = []
|
|
410
425
|
for fn in image_files:
|
|
@@ -419,7 +434,7 @@ def create_image_objects(image_files,recursive=True):
|
|
|
419
434
|
return images
|
|
420
435
|
|
|
421
436
|
|
|
422
|
-
def
|
|
437
|
+
def _populate_exif_for_images(image_base,images,options=None):
|
|
423
438
|
"""
|
|
424
439
|
Main worker loop: read EXIF data for each image object in [images] and
|
|
425
440
|
populate the image objects.
|
|
@@ -435,7 +450,7 @@ def populate_exif_for_images(image_base,images,options=None):
|
|
|
435
450
|
|
|
436
451
|
results = []
|
|
437
452
|
for im in tqdm(images):
|
|
438
|
-
results.append(
|
|
453
|
+
results.append(_populate_exif_data(im,image_base,options))
|
|
439
454
|
|
|
440
455
|
else:
|
|
441
456
|
|
|
@@ -447,13 +462,13 @@ def populate_exif_for_images(image_base,images,options=None):
|
|
|
447
462
|
print('Starting parallel process pool with {} workers'.format(options.n_workers))
|
|
448
463
|
pool = Pool(options.n_workers)
|
|
449
464
|
|
|
450
|
-
results = list(tqdm(pool.imap(partial(
|
|
465
|
+
results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
|
|
451
466
|
options=options),images),total=len(images)))
|
|
452
467
|
|
|
453
468
|
return results
|
|
454
469
|
|
|
455
470
|
|
|
456
|
-
def
|
|
471
|
+
def _write_exif_results(results,output_file):
|
|
457
472
|
"""
|
|
458
473
|
Write EXIF information to [output_file].
|
|
459
474
|
|
|
@@ -530,28 +545,24 @@ def write_exif_results(results,output_file):
|
|
|
530
545
|
print('Wrote results to {}'.format(output_file))
|
|
531
546
|
|
|
532
547
|
|
|
533
|
-
def is_executable(name):
|
|
534
|
-
|
|
535
|
-
"""Check whether `name` is on PATH and marked as executable."""
|
|
536
|
-
|
|
537
|
-
# https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
|
|
538
|
-
|
|
539
|
-
from shutil import which
|
|
540
|
-
return which(name) is not None
|
|
541
|
-
|
|
542
|
-
|
|
543
548
|
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
|
|
544
549
|
"""
|
|
545
|
-
Read EXIF data for
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
550
|
+
Read EXIF data for a folder of images.
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
|
|
554
|
+
paths
|
|
555
|
+
output_file (str, optional): .json file to which we should write results; if this is None, results
|
|
556
|
+
are returned but not written to disk
|
|
557
|
+
options (ReadExifOptions, optional): parameters controlling metadata extraction
|
|
558
|
+
filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
|
|
559
|
+
a list of absolute filenames (if [input_folder] is None)
|
|
560
|
+
recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
|
|
561
|
+
is None.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
|
|
565
|
+
we're using PIL or exiftool.
|
|
555
566
|
"""
|
|
556
567
|
|
|
557
568
|
if options is None:
|
|
@@ -589,16 +600,16 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
589
600
|
assert is_executable(options.exiftool_command_name), 'exiftool not available'
|
|
590
601
|
|
|
591
602
|
if filenames is None:
|
|
592
|
-
images =
|
|
603
|
+
images = _create_image_objects(input_folder,recursive=recursive)
|
|
593
604
|
else:
|
|
594
605
|
assert isinstance(filenames,list)
|
|
595
|
-
images =
|
|
606
|
+
images = _create_image_objects(filenames)
|
|
596
607
|
|
|
597
|
-
results =
|
|
608
|
+
results = _populate_exif_for_images(input_folder,images,options)
|
|
598
609
|
|
|
599
610
|
if output_file is not None:
|
|
600
611
|
try:
|
|
601
|
-
|
|
612
|
+
_write_exif_results(results,output_file)
|
|
602
613
|
except Exception as e:
|
|
603
614
|
if not options.allow_write_error:
|
|
604
615
|
raise
|
|
@@ -645,8 +656,10 @@ def main():
|
|
|
645
656
|
parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
|
|
646
657
|
' a folder, and write the results to .csv or .json'))
|
|
647
658
|
|
|
648
|
-
parser.add_argument('input_folder', type=str
|
|
649
|
-
|
|
659
|
+
parser.add_argument('input_folder', type=str,
|
|
660
|
+
help='Folder of images from which we should read EXIF information')
|
|
661
|
+
parser.add_argument('output_file', type=str,
|
|
662
|
+
help='Output file (.json) to which we should write EXIF information')
|
|
650
663
|
parser.add_argument('--n_workers', type=int, default=1,
|
|
651
664
|
help='Number of concurrent workers to use (defaults to 1)')
|
|
652
665
|
parser.add_argument('--use_threads', action='store_true',
|
|
@@ -1,84 +1,84 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
#%% Imports and constants
|
|
10
|
-
|
|
11
|
-
import os
|
|
12
|
-
import json
|
|
13
|
-
|
|
14
|
-
from copy import deepcopy
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
#%% Main function
|
|
18
|
-
|
|
19
|
-
def remap_coco_categories(input_data,
|
|
20
|
-
output_category_name_to_id,
|
|
21
|
-
input_category_name_to_output_category_name,
|
|
22
|
-
output_file=None):
|
|
23
|
-
"""
|
|
24
|
-
Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
|
|
25
|
-
writing the results to a new file.
|
|
26
|
-
|
|
27
|
-
output_category_name_to_id is a dict mapping strings to ints.
|
|
28
|
-
|
|
29
|
-
input_category_name_to_output_category_name is a dict mapping strings to strings.
|
|
30
|
-
|
|
31
|
-
[input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
|
|
32
|
-
not modified in place.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
if isinstance(input_data,str):
|
|
36
|
-
assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
|
|
37
|
-
with open(input_data,'r') as f:
|
|
38
|
-
input_data = json.load(f)
|
|
39
|
-
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
40
|
-
else:
|
|
41
|
-
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
42
|
-
input_data = deepcopy(input_data)
|
|
43
|
-
|
|
44
|
-
# It's safe to modify in-place now
|
|
45
|
-
output_data = input_data
|
|
46
|
-
|
|
47
|
-
# Read input name --> ID mapping
|
|
48
|
-
input_category_name_to_input_category_id = {}
|
|
49
|
-
for c in input_data['categories']:
|
|
50
|
-
input_category_name_to_input_category_id[c['name']] = c['id']
|
|
51
|
-
|
|
52
|
-
# Map input IDs --> output IDs
|
|
53
|
-
input_category_id_to_output_category_id = {}
|
|
54
|
-
for input_name in input_category_name_to_output_category_name.keys():
|
|
55
|
-
output_name = input_category_name_to_output_category_name[input_name]
|
|
56
|
-
assert output_name in output_category_name_to_id, \
|
|
57
|
-
'No output ID for {} --> {}'.format(input_name,output_name)
|
|
58
|
-
input_id = input_category_name_to_input_category_id[input_name]
|
|
59
|
-
output_id = output_category_name_to_id[output_name]
|
|
60
|
-
input_category_id_to_output_category_id[input_id] = output_id
|
|
61
|
-
|
|
62
|
-
# Map annotations
|
|
63
|
-
for ann in output_data['annotations']:
|
|
64
|
-
assert ann['category_id'] in input_category_id_to_output_category_id, \
|
|
65
|
-
'Unrecognized category ID {}'.format(ann['category_id'])
|
|
66
|
-
ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
|
|
67
|
-
|
|
68
|
-
# Update the category list
|
|
69
|
-
output_categories = []
|
|
70
|
-
for output_name in output_category_name_to_id:
|
|
71
|
-
category = {'name':output_name,'id':output_category_name_to_id[output_name]}
|
|
72
|
-
output_categories.append(category)
|
|
73
|
-
output_data['categories'] = output_categories
|
|
74
|
-
|
|
75
|
-
if output_file is not None:
|
|
76
|
-
with open(output_file,'w') as f:
|
|
77
|
-
json.dump(output_data,f,indent=1)
|
|
78
|
-
|
|
79
|
-
return input_data
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
#%% Command-line driver
|
|
83
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
remap_coco_categories.py
|
|
4
|
+
|
|
5
|
+
Given a COCO-formatted dataset, remap the categories to a new mapping.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Imports and constants
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
from copy import deepcopy
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#%% Main function
|
|
18
|
+
|
|
19
|
+
def remap_coco_categories(input_data,
|
|
20
|
+
output_category_name_to_id,
|
|
21
|
+
input_category_name_to_output_category_name,
|
|
22
|
+
output_file=None):
|
|
23
|
+
"""
|
|
24
|
+
Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
|
|
25
|
+
writing the results to a new file.
|
|
26
|
+
|
|
27
|
+
output_category_name_to_id is a dict mapping strings to ints.
|
|
28
|
+
|
|
29
|
+
input_category_name_to_output_category_name is a dict mapping strings to strings.
|
|
30
|
+
|
|
31
|
+
[input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
|
|
32
|
+
not modified in place.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
if isinstance(input_data,str):
|
|
36
|
+
assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
|
|
37
|
+
with open(input_data,'r') as f:
|
|
38
|
+
input_data = json.load(f)
|
|
39
|
+
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
40
|
+
else:
|
|
41
|
+
assert isinstance(input_data,dict), 'Illegal COCO input data'
|
|
42
|
+
input_data = deepcopy(input_data)
|
|
43
|
+
|
|
44
|
+
# It's safe to modify in-place now
|
|
45
|
+
output_data = input_data
|
|
46
|
+
|
|
47
|
+
# Read input name --> ID mapping
|
|
48
|
+
input_category_name_to_input_category_id = {}
|
|
49
|
+
for c in input_data['categories']:
|
|
50
|
+
input_category_name_to_input_category_id[c['name']] = c['id']
|
|
51
|
+
|
|
52
|
+
# Map input IDs --> output IDs
|
|
53
|
+
input_category_id_to_output_category_id = {}
|
|
54
|
+
for input_name in input_category_name_to_output_category_name.keys():
|
|
55
|
+
output_name = input_category_name_to_output_category_name[input_name]
|
|
56
|
+
assert output_name in output_category_name_to_id, \
|
|
57
|
+
'No output ID for {} --> {}'.format(input_name,output_name)
|
|
58
|
+
input_id = input_category_name_to_input_category_id[input_name]
|
|
59
|
+
output_id = output_category_name_to_id[output_name]
|
|
60
|
+
input_category_id_to_output_category_id[input_id] = output_id
|
|
61
|
+
|
|
62
|
+
# Map annotations
|
|
63
|
+
for ann in output_data['annotations']:
|
|
64
|
+
assert ann['category_id'] in input_category_id_to_output_category_id, \
|
|
65
|
+
'Unrecognized category ID {}'.format(ann['category_id'])
|
|
66
|
+
ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
|
|
67
|
+
|
|
68
|
+
# Update the category list
|
|
69
|
+
output_categories = []
|
|
70
|
+
for output_name in output_category_name_to_id:
|
|
71
|
+
category = {'name':output_name,'id':output_category_name_to_id[output_name]}
|
|
72
|
+
output_categories.append(category)
|
|
73
|
+
output_data['categories'] = output_categories
|
|
74
|
+
|
|
75
|
+
if output_file is not None:
|
|
76
|
+
with open(output_file,'w') as f:
|
|
77
|
+
json.dump(output_data,f,indent=1)
|
|
78
|
+
|
|
79
|
+
return input_data
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
#%% Command-line driver
|
|
83
|
+
|
|
84
84
|
# TODO
|
data_management/remove_exif.py
CHANGED
|
@@ -1,70 +1,66 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
remove_exif.py
|
|
4
|
+
|
|
5
|
+
Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
|
|
6
|
+
backup copies, using pyexiv2.
|
|
7
|
+
|
|
8
|
+
TODO: This is a one-off script waiting to be cleaned up for more general use.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
input_base = r'f:\images'
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
#%% Imports and constants
|
|
11
16
|
|
|
12
17
|
import os
|
|
13
18
|
import glob
|
|
14
19
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
|
|
22
|
-
image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
import pyexiv2
|
|
28
|
-
|
|
29
|
-
# PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
|
|
30
|
-
def remove_exif(fn):
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
img = pyexiv2.Image(fn)
|
|
34
|
-
# data = img.read_exif(); print(data)
|
|
35
|
-
img.clear_exif()
|
|
36
|
-
img.clear_iptc()
|
|
37
|
-
img.clear_xmp()
|
|
38
|
-
img.close()
|
|
39
|
-
except Exception as e:
|
|
40
|
-
print('EXIF error on {}: {}'.format(fn,str(e)))
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
#%% Debug
|
|
44
|
-
|
|
45
|
-
if False:
|
|
46
|
-
#%%
|
|
47
|
-
fn = image_files[-10001]
|
|
48
|
-
os.startfile(fn)
|
|
49
|
-
#%%
|
|
50
|
-
remove_exif(fn)
|
|
51
|
-
os.startfile(fn)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
#%% Remove EXIF data (execution)
|
|
55
|
-
|
|
56
|
-
from joblib import Parallel, delayed
|
|
57
|
-
|
|
58
|
-
n_exif_threads = 50
|
|
59
|
-
|
|
60
|
-
if n_exif_threads == 1:
|
|
61
|
-
|
|
62
|
-
# fn = image_files[0]
|
|
63
|
-
for fn in image_files:
|
|
64
|
-
remove_exif(fn)
|
|
20
|
+
def main():
|
|
21
|
+
|
|
22
|
+
assert os.path.isdir(input_base)
|
|
23
|
+
|
|
24
|
+
##%% List files
|
|
25
|
+
|
|
26
|
+
all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
|
|
27
|
+
image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
##%% Remove EXIF data (support)
|
|
31
|
+
|
|
32
|
+
import pyexiv2
|
|
33
|
+
|
|
34
|
+
# PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
|
|
35
|
+
def remove_exif(fn):
|
|
65
36
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
37
|
+
try:
|
|
38
|
+
img = pyexiv2.Image(fn)
|
|
39
|
+
# data = img.read_exif(); print(data)
|
|
40
|
+
img.clear_exif()
|
|
41
|
+
img.clear_iptc()
|
|
42
|
+
img.clear_xmp()
|
|
43
|
+
img.close()
|
|
44
|
+
except Exception as e:
|
|
45
|
+
print('EXIF error on {}: {}'.format(fn,str(e)))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
##%% Remove EXIF data (execution)
|
|
49
|
+
|
|
50
|
+
from joblib import Parallel, delayed
|
|
70
51
|
|
|
52
|
+
n_exif_threads = 50
|
|
53
|
+
|
|
54
|
+
if n_exif_threads == 1:
|
|
55
|
+
|
|
56
|
+
# fn = image_files[0]
|
|
57
|
+
for fn in image_files:
|
|
58
|
+
remove_exif(fn)
|
|
59
|
+
|
|
60
|
+
else:
|
|
61
|
+
# joblib.Parallel defaults to a process-based backend, but let's be sure
|
|
62
|
+
# results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
|
|
63
|
+
_ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
|
|
64
|
+
|
|
65
|
+
if __name__ == '__main__':
|
|
66
|
+
main()
|