megadetector 5.0.12__py3-none-any.whl → 5.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/server.py +1 -1
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -1
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -3
- megadetector/api/batch_processing/api_core/server_utils.py +0 -4
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -3
- megadetector/classification/efficientnet/utils.py +0 -3
- megadetector/data_management/camtrap_dp_to_coco.py +0 -2
- megadetector/data_management/cct_json_utils.py +15 -6
- megadetector/data_management/coco_to_labelme.py +12 -1
- megadetector/data_management/databases/integrity_check_json_db.py +43 -27
- megadetector/data_management/importers/cacophony-thermal-importer.py +1 -4
- megadetector/data_management/ocr_tools.py +0 -4
- megadetector/data_management/read_exif.py +171 -43
- megadetector/data_management/rename_images.py +187 -0
- megadetector/data_management/wi_download_csv_to_coco.py +3 -2
- megadetector/data_management/yolo_output_to_md_output.py +7 -2
- megadetector/detection/process_video.py +360 -216
- megadetector/detection/pytorch_detector.py +17 -3
- megadetector/detection/run_inference_with_yolov5_val.py +527 -357
- megadetector/detection/tf_detector.py +3 -0
- megadetector/detection/video_utils.py +122 -30
- megadetector/postprocessing/categorize_detections_by_size.py +16 -14
- megadetector/postprocessing/classification_postprocessing.py +716 -0
- megadetector/postprocessing/compare_batch_results.py +101 -93
- megadetector/postprocessing/merge_detections.py +18 -7
- megadetector/postprocessing/postprocess_batch_results.py +133 -127
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +236 -232
- megadetector/postprocessing/subset_json_detector_output.py +66 -62
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -2
- megadetector/utils/ct_utils.py +5 -4
- megadetector/utils/md_tests.py +311 -115
- megadetector/utils/path_utils.py +1 -0
- megadetector/utils/process_utils.py +6 -3
- megadetector/visualization/visualize_db.py +79 -77
- {megadetector-5.0.12.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
- {megadetector-5.0.12.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
- {megadetector-5.0.12.dist-info → megadetector-5.0.13.dist-info}/RECORD +40 -38
- {megadetector-5.0.12.dist-info → megadetector-5.0.13.dist-info}/top_level.txt +0 -0
- {megadetector-5.0.12.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
|
@@ -105,7 +105,7 @@ def request_detections():
|
|
|
105
105
|
model_version = post_body.get('model_version', '')
|
|
106
106
|
if model_version != '':
|
|
107
107
|
model_version = str(model_version) # in case user used an int
|
|
108
|
-
if model_version not in api_config.MD_VERSIONS_TO_REL_PATH:
|
|
108
|
+
if model_version not in api_config.MD_VERSIONS_TO_REL_PATH:
|
|
109
109
|
return make_error(400, f'model_version {model_version} is not supported.')
|
|
110
110
|
|
|
111
111
|
# check request_name has only allowed characters
|
|
@@ -47,7 +47,6 @@ MAX_BATCH_ACCOUNT_ACTIVE_JOBS = 300
|
|
|
47
47
|
DETECTION_CONF_THRESHOLD = 0.1
|
|
48
48
|
|
|
49
49
|
# relative to the `megadetector_copies` folder in the container `models`
|
|
50
|
-
# TODO add MD versions info to AppConfig
|
|
51
50
|
MD_VERSIONS_TO_REL_PATH = {
|
|
52
51
|
'4.1': 'megadetector_v4_1/md_v4.1.0.pb',
|
|
53
52
|
'3': 'megadetector_v3/megadetector_v3_tf19.pb',
|
|
@@ -67,9 +67,6 @@ class JobStatusTable:
|
|
|
67
67
|
assert 'request_status' in status and 'message' in status
|
|
68
68
|
assert status['request_status'] in JobStatusTable.allowed_statuses
|
|
69
69
|
|
|
70
|
-
# TODO do not read the entry first to get the call_params when the Cosmos SDK add a
|
|
71
|
-
# patching functionality:
|
|
72
|
-
# https://feedback.azure.com/forums/263030-azure-cosmos-db/suggestions/6693091-be-able-to-do-partial-updates-on-document
|
|
73
70
|
item_old = self.read_job_status(job_id)
|
|
74
71
|
if item_old is None:
|
|
75
72
|
raise ValueError
|
|
@@ -19,7 +19,6 @@ log = logging.getLogger(os.environ['FLASK_APP'])
|
|
|
19
19
|
#%% helper classes and functions
|
|
20
20
|
|
|
21
21
|
def make_error(error_code: int, error_message: str) -> Tuple[dict, int]:
|
|
22
|
-
# TODO log exception when we have more telemetry
|
|
23
22
|
log.error(f'Error {error_code} - {error_message}')
|
|
24
23
|
return {'error': error_message}, error_code
|
|
25
24
|
|
|
@@ -28,8 +27,6 @@ def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, st
|
|
|
28
27
|
"""
|
|
29
28
|
Returns a tuple (error_code, msg) if not a usable SAS URL, else returns None
|
|
30
29
|
"""
|
|
31
|
-
# TODO check that the expiry date of input_container_sas is at least a month
|
|
32
|
-
# into the future
|
|
33
30
|
permissions = sas_blob_utils.get_permissions_from_uri(input_container_sas)
|
|
34
31
|
data = sas_blob_utils.get_all_query_parts(input_container_sas)
|
|
35
32
|
|
|
@@ -38,7 +35,6 @@ def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, st
|
|
|
38
35
|
if 'read' not in permissions or 'list' not in permissions:
|
|
39
36
|
if 'si' in data:
|
|
40
37
|
# if no permission specified explicitly but has an access policy, assumes okay
|
|
41
|
-
# TODO - check based on access policy as well
|
|
42
38
|
return None
|
|
43
39
|
|
|
44
40
|
return 400, msg
|
|
@@ -147,8 +147,6 @@ def detect_sync():
|
|
|
147
147
|
|
|
148
148
|
try:
|
|
149
149
|
# Write images to temporary files
|
|
150
|
-
#
|
|
151
|
-
# TODO: read from memory rather than using intermediate files
|
|
152
150
|
os.makedirs(temp_direc,exist_ok=True)
|
|
153
151
|
for name, file in request.files.items():
|
|
154
152
|
if file.content_type in config.IMAGE_CONTENT_TYPES:
|
|
@@ -166,7 +164,6 @@ def detect_sync():
|
|
|
166
164
|
|
|
167
165
|
while True:
|
|
168
166
|
|
|
169
|
-
# TODO: convert to a blocking read and eliminate the sleep() statement in this loop
|
|
170
167
|
result = db.get(redis_id)
|
|
171
168
|
|
|
172
169
|
if result:
|
|
@@ -90,9 +90,6 @@ def round_filters(filters, global_params):
|
|
|
90
90
|
multiplier = global_params.width_coefficient
|
|
91
91
|
if not multiplier:
|
|
92
92
|
return filters
|
|
93
|
-
# TODO: modify the params names.
|
|
94
|
-
# maybe the names (width_divisor,min_width)
|
|
95
|
-
# are more suitable than (depth_divisor,min_depth).
|
|
96
93
|
divisor = global_params.depth_divisor
|
|
97
94
|
min_depth = global_params.min_depth
|
|
98
95
|
filters *= multiplier
|
|
@@ -295,8 +295,9 @@ class SequenceOptions:
|
|
|
295
295
|
Options parameterizing the grouping of images into sequences by time.
|
|
296
296
|
"""
|
|
297
297
|
|
|
298
|
-
|
|
299
|
-
|
|
298
|
+
def __init__(self):
|
|
299
|
+
#: Images separated by <= this duration will be grouped into the same sequence.
|
|
300
|
+
self.episode_interval_seconds = 60.0
|
|
300
301
|
|
|
301
302
|
|
|
302
303
|
#%% Functions
|
|
@@ -305,16 +306,24 @@ def create_sequences(image_info,options=None):
|
|
|
305
306
|
"""
|
|
306
307
|
Synthesizes episodes/sequences/bursts for the images in [image_info].
|
|
307
308
|
|
|
308
|
-
Modifies [image_info], populating the 'seq_id', 'seq_num_frames', and 'frame_num'
|
|
309
|
-
for each image.
|
|
309
|
+
Modifies [image_info] in place, populating the 'seq_id', 'seq_num_frames', and 'frame_num'
|
|
310
|
+
fields for each image.
|
|
310
311
|
|
|
311
312
|
Args:
|
|
312
|
-
image_info (dict): a
|
|
313
|
-
'
|
|
313
|
+
image_info (str, dict, or list): a dict in CCT format, a CCT .json file, or just the 'images' component
|
|
314
|
+
of a CCT dataset (a list of dicts with fields 'file_name' (str), 'datetime' (datetime), and
|
|
315
|
+
'location' (str)).
|
|
314
316
|
"""
|
|
315
317
|
|
|
316
318
|
if options is None:
|
|
317
319
|
options = SequenceOptions()
|
|
320
|
+
|
|
321
|
+
if isinstance(image_info,str):
|
|
322
|
+
with open(image_info,'r') as f:
|
|
323
|
+
image_info = json.load(f)
|
|
324
|
+
|
|
325
|
+
if isinstance(image_info,dict):
|
|
326
|
+
image_info = image_info['images']
|
|
318
327
|
|
|
319
328
|
# Find all unique locations
|
|
320
329
|
locations = set()
|
|
@@ -95,7 +95,18 @@ def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=N
|
|
|
95
95
|
def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
|
|
96
96
|
"""
|
|
97
97
|
For all the images in [coco_data] (a dict or a filename), write a .json file in
|
|
98
|
-
labelme format alongside the corresponding relative path within image_base.
|
|
98
|
+
labelme format alongside the corresponding relative path within image_base.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
coco_data (dict or str): path to a COCO-formatted .json file, or an already-loaded
|
|
102
|
+
COCO-formatted dict
|
|
103
|
+
image_base (str): path where images live (filenames in [coco_data] should be relative to
|
|
104
|
+
[image_base]); this is also where labelme files will be written
|
|
105
|
+
overwrite (bool, optional): overwrite existing .json files
|
|
106
|
+
bypass_image_size_check (bool, optional): if you're sure that the COCO data already has
|
|
107
|
+
correct 'width' and 'height' fields, this bypasses the somewhat-slow loading of
|
|
108
|
+
each image to fetch image sizes
|
|
109
|
+
verbose (bool, optional): enable additional debug output
|
|
99
110
|
"""
|
|
100
111
|
|
|
101
112
|
# Load COCO data if necessary
|
|
@@ -37,30 +37,34 @@ class IntegrityCheckOptions:
|
|
|
37
37
|
Options for integrity_check_json_db()
|
|
38
38
|
"""
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
40
|
+
def __init__(self):
|
|
41
|
+
|
|
42
|
+
#: Image path; the filenames in the .json file should be relative to this folder
|
|
43
|
+
self.baseDir = ''
|
|
44
|
+
|
|
45
|
+
#: Should we validate the image sizes?
|
|
46
|
+
self.bCheckImageSizes = False
|
|
47
|
+
|
|
48
|
+
#: Should we check that all the images in the .json file exist on disk?
|
|
49
|
+
self.bCheckImageExistence = False
|
|
50
|
+
|
|
51
|
+
#: Should we search [baseDir] for images that are not used in the .json file?
|
|
52
|
+
self.bFindUnusedImages = False
|
|
53
|
+
|
|
54
|
+
#: Should we require that all images in the .json file have a 'location' field?
|
|
55
|
+
self.bRequireLocation = True
|
|
56
|
+
|
|
57
|
+
#: For debugging, limit the number of images we'll process
|
|
58
|
+
self.iMaxNumImages = -1
|
|
59
|
+
|
|
60
|
+
#: Number of threads to use for parallelization, set to <= 1 to disable parallelization
|
|
61
|
+
self.nThreads = 10
|
|
62
|
+
|
|
63
|
+
#: Enable additional debug output
|
|
64
|
+
self.verbose = True
|
|
65
|
+
|
|
66
|
+
#: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
|
|
67
|
+
self.allowIntIDs = False
|
|
64
68
|
|
|
65
69
|
# This is used in a medium-hacky way to share modified options across threads
|
|
66
70
|
defaultOptions = IntegrityCheckOptions()
|
|
@@ -231,7 +235,12 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
231
235
|
imagePathsInJson.add(image['file_name'])
|
|
232
236
|
|
|
233
237
|
assert isinstance(image['file_name'],str), 'Illegal image filename type'
|
|
234
|
-
|
|
238
|
+
|
|
239
|
+
if options.allowIntIDs:
|
|
240
|
+
assert isinstance(image['id'],str) or isinstance(image['id'],int), \
|
|
241
|
+
'Illegal image ID type'
|
|
242
|
+
else:
|
|
243
|
+
assert isinstance(image['id'],str), 'Illegal image ID type'
|
|
235
244
|
|
|
236
245
|
imageId = image['id']
|
|
237
246
|
|
|
@@ -329,9 +338,16 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
329
338
|
assert 'id' in ann
|
|
330
339
|
assert 'category_id' in ann
|
|
331
340
|
|
|
332
|
-
|
|
341
|
+
if options.allowIntIDs:
|
|
342
|
+
assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
|
|
343
|
+
'Illegal annotation ID type'
|
|
344
|
+
assert isinstance(ann['image_id'],str) or isinstance(ann['image_id'],int), \
|
|
345
|
+
'Illegal annotation image ID type'
|
|
346
|
+
else:
|
|
347
|
+
assert isinstance(ann['id'],str), 'Illegal annotation ID type'
|
|
348
|
+
assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
|
|
349
|
+
|
|
333
350
|
assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
|
|
334
|
-
assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
|
|
335
351
|
|
|
336
352
|
if 'bbox' in ann:
|
|
337
353
|
nBoxes += 1
|
|
@@ -315,7 +315,7 @@ def process_file(fn_relative,verbose=False):
|
|
|
315
315
|
bottom = float(position[3])
|
|
316
316
|
frame_number = int(position[4])
|
|
317
317
|
|
|
318
|
-
#
|
|
318
|
+
# I'm being lazy about the fact that these don't reflect the
|
|
319
319
|
# pixels cropped out of the border. IMO this is OK because for this dataset,
|
|
320
320
|
# this is just an approximate set of coordinates used to disambiguate simultaneous
|
|
321
321
|
# areas of movement when multiple different labels are present in the same video.
|
|
@@ -488,7 +488,6 @@ def process_file(fn_relative,verbose=False):
|
|
|
488
488
|
if tag['confidence'] >= confidence_threshold:
|
|
489
489
|
valid_tags.append(tag)
|
|
490
490
|
else:
|
|
491
|
-
# TODO
|
|
492
491
|
print('Zero-confidence tag in {}'.format(fn_relative))
|
|
493
492
|
|
|
494
493
|
track_info['tags'] = valid_tags
|
|
@@ -497,13 +496,11 @@ def process_file(fn_relative,verbose=False):
|
|
|
497
496
|
if len(valid_tags) > 0:
|
|
498
497
|
valid_tracks.append(track_info)
|
|
499
498
|
else:
|
|
500
|
-
# TODO
|
|
501
499
|
print('Invalid track in {}'.format(fn_relative))
|
|
502
500
|
|
|
503
501
|
# ...for each track
|
|
504
502
|
|
|
505
503
|
if (len(clip_metadata['tracks']) > 0) and (len(valid_tracks) == 0):
|
|
506
|
-
# TODO
|
|
507
504
|
print('Removed all tracks from {}'.format(fn_relative))
|
|
508
505
|
|
|
509
506
|
clip_metadata['tracks'] = valid_tracks
|
|
@@ -26,6 +26,7 @@ from PIL import Image, ExifTags
|
|
|
26
26
|
|
|
27
27
|
from megadetector.utils.path_utils import find_images, is_executable
|
|
28
28
|
from megadetector.utils.ct_utils import args_to_object
|
|
29
|
+
from megadetector.utils.ct_utils import image_file_to_camera_folder
|
|
29
30
|
|
|
30
31
|
debug_max_images = None
|
|
31
32
|
|
|
@@ -37,51 +38,74 @@ class ReadExifOptions:
|
|
|
37
38
|
Parameters controlling metadata extraction.
|
|
38
39
|
"""
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
verbose = False
|
|
42
|
-
|
|
43
|
-
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
44
|
-
#: and we encounter a serialization issue, we'll return the results but won't
|
|
45
|
-
#: error.
|
|
46
|
-
allow_write_error = False
|
|
47
|
-
|
|
48
|
-
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
49
|
-
n_workers = 1
|
|
50
|
-
|
|
51
|
-
#: Should we use threads (vs. processes) for parallelization?
|
|
52
|
-
#:
|
|
53
|
-
#: Not relevant if n_workers is <= 1.
|
|
54
|
-
use_threads = True
|
|
41
|
+
def __init__(self):
|
|
55
42
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
43
|
+
#: Enable additional debug console output
|
|
44
|
+
self.verbose = False
|
|
45
|
+
|
|
46
|
+
#: If this is True and an output file is specified for read_exif_from_folder,
|
|
47
|
+
#: and we encounter a serialization issue, we'll return the results but won't
|
|
48
|
+
#: error.
|
|
49
|
+
self.allow_write_error = False
|
|
50
|
+
|
|
51
|
+
#: Number of concurrent workers, set to <= 1 to disable parallelization
|
|
52
|
+
self.n_workers = 1
|
|
53
|
+
|
|
54
|
+
#: Should we use threads (vs. processes) for parallelization?
|
|
55
|
+
#:
|
|
56
|
+
#: Not relevant if n_workers is <= 1.
|
|
57
|
+
self.use_threads = True
|
|
58
|
+
|
|
59
|
+
#: "File" and "ExifTool" are tag types used by ExifTool to report data that
|
|
60
|
+
#: doesn't come from EXIF, rather from the file (e.g. file size).
|
|
61
|
+
self.tag_types_to_ignore = set(['File','ExifTool'])
|
|
62
|
+
|
|
63
|
+
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
64
|
+
#:
|
|
65
|
+
#: A useful set of tags one might want to limit queries for:
|
|
66
|
+
#:
|
|
67
|
+
#: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
|
|
68
|
+
#: 'DateTimeOriginal','Orientation']
|
|
69
|
+
self.tags_to_include = None
|
|
70
|
+
|
|
71
|
+
#: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
|
|
72
|
+
self.tags_to_exclude = None
|
|
73
|
+
|
|
74
|
+
#: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
|
|
75
|
+
#: can be just "exiftool", in which case it should be on your system path.
|
|
76
|
+
self.exiftool_command_name = 'exiftool'
|
|
77
|
+
|
|
78
|
+
#: How should we handle byte-formatted EXIF tags?
|
|
79
|
+
#:
|
|
80
|
+
#: 'convert_to_string': convert to a Python string
|
|
81
|
+
#: 'delete': don't include at all
|
|
82
|
+
#: 'raw': include as a byte string
|
|
83
|
+
self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
84
|
+
|
|
85
|
+
#: Should we use exiftool or PIL?
|
|
86
|
+
self.processing_library = 'pil' # 'exiftool','pil'
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ExifResultsToCCTOptions:
|
|
90
|
+
"""
|
|
91
|
+
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime information)
|
|
92
|
+
extracted by read_exif_from_folder().
|
|
93
|
+
"""
|
|
67
94
|
|
|
68
|
-
|
|
69
|
-
|
|
95
|
+
def __init__(self):
|
|
96
|
+
|
|
97
|
+
#: Timestamps older than this are assumed to be junk; lots of cameras use a
|
|
98
|
+
#: default time in 2000.
|
|
99
|
+
self.min_valid_timestamp_year = 2001
|
|
70
100
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
exiftool_command_name = 'exiftool'
|
|
101
|
+
#: The EXIF tag from which to pull datetime information
|
|
102
|
+
self.exif_datetime_tag = 'DateTimeOriginal'
|
|
74
103
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
#: 'raw': include as a byte string
|
|
80
|
-
byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
|
|
104
|
+
#: Function for extracting location information, should take a string
|
|
105
|
+
#: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
|
|
106
|
+
#: this is None, uses folder names as locations.
|
|
107
|
+
self.filename_to_location_function = image_file_to_camera_folder
|
|
81
108
|
|
|
82
|
-
#: Should we use exiftool or PIL?
|
|
83
|
-
processing_library = 'pil' # 'exiftool','pil'
|
|
84
|
-
|
|
85
109
|
|
|
86
110
|
#%% Functions
|
|
87
111
|
|
|
@@ -437,7 +461,7 @@ def _create_image_objects(image_files,recursive=True):
|
|
|
437
461
|
def _populate_exif_for_images(image_base,images,options=None):
|
|
438
462
|
"""
|
|
439
463
|
Main worker loop: read EXIF data for each image object in [images] and
|
|
440
|
-
populate the image objects.
|
|
464
|
+
populate the image objects in place.
|
|
441
465
|
|
|
442
466
|
'images' should be a list of dicts with the field 'file_name' containing
|
|
443
467
|
a relative path (relative to 'image_base').
|
|
@@ -544,6 +568,8 @@ def _write_exif_results(results,output_file):
|
|
|
544
568
|
|
|
545
569
|
print('Wrote results to {}'.format(output_file))
|
|
546
570
|
|
|
571
|
+
# ..._write_exif_results(...)
|
|
572
|
+
|
|
547
573
|
|
|
548
574
|
def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
|
|
549
575
|
"""
|
|
@@ -559,10 +585,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
559
585
|
a list of absolute filenames (if [input_folder] is None)
|
|
560
586
|
recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
|
|
561
587
|
is None.
|
|
588
|
+
verbose (bool, optional): enable additional debug output
|
|
562
589
|
|
|
563
590
|
Returns:
|
|
564
|
-
|
|
565
|
-
|
|
591
|
+
list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
|
|
592
|
+
* 'file_name': the relative path to the image
|
|
593
|
+
* 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
|
|
566
594
|
"""
|
|
567
595
|
|
|
568
596
|
if options is None:
|
|
@@ -618,6 +646,106 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
|
|
|
618
646
|
|
|
619
647
|
return results
|
|
620
648
|
|
|
649
|
+
# ...read_exif_from_folder(...)
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
653
|
+
"""
|
|
654
|
+
Given the EXIF results for a folder of images read via read_exif_from_folder,
|
|
655
|
+
create a COCO Camera Traps .json file that has no annotations, but
|
|
656
|
+
attaches image filenames to locations and datetimes.
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
exif_results (str or list): the filename (or loaded list) containing the results
|
|
660
|
+
from read_exif_from_folder
|
|
661
|
+
cct_file (str,optional): the filename to which we should write COCO-Camera-Traps-formatted
|
|
662
|
+
data
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
dict: a COCO Camera Traps dict (with no annotations).
|
|
666
|
+
"""
|
|
667
|
+
|
|
668
|
+
if options is None:
|
|
669
|
+
options = ExifResultsToCCTOptions()
|
|
670
|
+
|
|
671
|
+
if isinstance(exif_results,str):
|
|
672
|
+
print('Reading EXIF results from {}'.format(exif_results))
|
|
673
|
+
with open(exif_results,'r') as f:
|
|
674
|
+
exif_results = json.load(f)
|
|
675
|
+
else:
|
|
676
|
+
assert isinstance(exif_results,list)
|
|
677
|
+
|
|
678
|
+
now = datetime.now()
|
|
679
|
+
|
|
680
|
+
image_info = []
|
|
681
|
+
|
|
682
|
+
images_without_datetime = []
|
|
683
|
+
images_with_invalid_datetime = []
|
|
684
|
+
|
|
685
|
+
# exif_result = exif_results[0]
|
|
686
|
+
for exif_result in tqdm(exif_results):
|
|
687
|
+
|
|
688
|
+
im = {}
|
|
689
|
+
|
|
690
|
+
# By default we assume that each leaf-node folder is a location
|
|
691
|
+
if options.filename_to_location_function is None:
|
|
692
|
+
im['location'] = os.path.dirname(exif_result['file_name'])
|
|
693
|
+
else:
|
|
694
|
+
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
695
|
+
|
|
696
|
+
im['file_name'] = exif_result['file_name']
|
|
697
|
+
im['id'] = im['file_name']
|
|
698
|
+
|
|
699
|
+
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
700
|
+
(options.exif_datetime_tag not in exif_result['exif_tags']):
|
|
701
|
+
exif_dt = None
|
|
702
|
+
else:
|
|
703
|
+
exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
|
|
704
|
+
exif_dt = parse_exif_datetime_string(exif_dt)
|
|
705
|
+
if exif_dt is None:
|
|
706
|
+
im['datetime'] = None
|
|
707
|
+
images_without_datetime.append(im['file_name'])
|
|
708
|
+
else:
|
|
709
|
+
dt = exif_dt
|
|
710
|
+
|
|
711
|
+
# An image from the future (or within the last 24 hours) is invalid
|
|
712
|
+
if (now - dt).total_seconds() <= 1*24*60*60:
|
|
713
|
+
print('Warning: datetime for {} is {}'.format(
|
|
714
|
+
im['file_name'],dt))
|
|
715
|
+
im['datetime'] = None
|
|
716
|
+
images_with_invalid_datetime.append(im['file_name'])
|
|
717
|
+
|
|
718
|
+
# An image from before the dawn of time is also invalid
|
|
719
|
+
elif dt.year < options.min_valid_timestamp_year:
|
|
720
|
+
print('Warning: datetime for {} is {}'.format(
|
|
721
|
+
im['file_name'],dt))
|
|
722
|
+
im['datetime'] = None
|
|
723
|
+
images_with_invalid_datetime.append(im['file_name'])
|
|
724
|
+
|
|
725
|
+
else:
|
|
726
|
+
im['datetime'] = dt
|
|
727
|
+
|
|
728
|
+
image_info.append(im)
|
|
729
|
+
|
|
730
|
+
# ...for each exif image result
|
|
731
|
+
|
|
732
|
+
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
733
|
+
len(images_without_datetime),len(exif_results)))
|
|
734
|
+
|
|
735
|
+
d = {}
|
|
736
|
+
d['info'] = {}
|
|
737
|
+
d['images'] = image_info
|
|
738
|
+
d['annotations'] = []
|
|
739
|
+
d['categories'] = []
|
|
740
|
+
|
|
741
|
+
if cct_output_file is not None:
|
|
742
|
+
with open(cct_output_file,'w') as f:
|
|
743
|
+
json.dump(d,indent=1)
|
|
744
|
+
|
|
745
|
+
return d
|
|
746
|
+
|
|
747
|
+
# ...exif_results_to_cct(...)
|
|
748
|
+
|
|
621
749
|
|
|
622
750
|
#%% Interactive driver
|
|
623
751
|
|