megadetector 5.0.12__py3-none-any.whl → 5.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (45) hide show
  1. megadetector/api/batch_processing/api_core/server.py +1 -1
  2. megadetector/api/batch_processing/api_core/server_api_config.py +0 -1
  3. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -3
  4. megadetector/api/batch_processing/api_core/server_utils.py +0 -4
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  6. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -3
  7. megadetector/classification/efficientnet/utils.py +0 -3
  8. megadetector/data_management/camtrap_dp_to_coco.py +0 -2
  9. megadetector/data_management/cct_json_utils.py +15 -6
  10. megadetector/data_management/coco_to_labelme.py +12 -1
  11. megadetector/data_management/databases/integrity_check_json_db.py +43 -27
  12. megadetector/data_management/importers/cacophony-thermal-importer.py +1 -4
  13. megadetector/data_management/ocr_tools.py +0 -4
  14. megadetector/data_management/read_exif.py +178 -44
  15. megadetector/data_management/rename_images.py +187 -0
  16. megadetector/data_management/wi_download_csv_to_coco.py +3 -2
  17. megadetector/data_management/yolo_output_to_md_output.py +7 -2
  18. megadetector/detection/process_video.py +548 -244
  19. megadetector/detection/pytorch_detector.py +33 -14
  20. megadetector/detection/run_detector.py +17 -5
  21. megadetector/detection/run_detector_batch.py +179 -65
  22. megadetector/detection/run_inference_with_yolov5_val.py +527 -357
  23. megadetector/detection/tf_detector.py +14 -3
  24. megadetector/detection/video_utils.py +284 -61
  25. megadetector/postprocessing/categorize_detections_by_size.py +16 -14
  26. megadetector/postprocessing/classification_postprocessing.py +716 -0
  27. megadetector/postprocessing/compare_batch_results.py +101 -93
  28. megadetector/postprocessing/convert_output_format.py +12 -5
  29. megadetector/postprocessing/merge_detections.py +18 -7
  30. megadetector/postprocessing/postprocess_batch_results.py +133 -127
  31. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +236 -232
  32. megadetector/postprocessing/subset_json_detector_output.py +66 -62
  33. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -2
  34. megadetector/utils/ct_utils.py +5 -4
  35. megadetector/utils/md_tests.py +380 -128
  36. megadetector/utils/path_utils.py +39 -6
  37. megadetector/utils/process_utils.py +13 -4
  38. megadetector/visualization/visualization_utils.py +7 -2
  39. megadetector/visualization/visualize_db.py +79 -77
  40. megadetector/visualization/visualize_detector_output.py +0 -1
  41. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/LICENSE +0 -0
  42. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/METADATA +2 -2
  43. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/RECORD +45 -43
  44. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/top_level.txt +0 -0
  45. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/WHEEL +0 -0
@@ -105,7 +105,7 @@ def request_detections():
105
105
  model_version = post_body.get('model_version', '')
106
106
  if model_version != '':
107
107
  model_version = str(model_version) # in case user used an int
108
- if model_version not in api_config.MD_VERSIONS_TO_REL_PATH: # TODO use AppConfig to store model version info
108
+ if model_version not in api_config.MD_VERSIONS_TO_REL_PATH:
109
109
  return make_error(400, f'model_version {model_version} is not supported.')
110
110
 
111
111
  # check request_name has only allowed characters
@@ -47,7 +47,6 @@ MAX_BATCH_ACCOUNT_ACTIVE_JOBS = 300
47
47
  DETECTION_CONF_THRESHOLD = 0.1
48
48
 
49
49
  # relative to the `megadetector_copies` folder in the container `models`
50
- # TODO add MD versions info to AppConfig
51
50
  MD_VERSIONS_TO_REL_PATH = {
52
51
  '4.1': 'megadetector_v4_1/md_v4.1.0.pb',
53
52
  '3': 'megadetector_v3/megadetector_v3_tf19.pb',
@@ -67,9 +67,6 @@ class JobStatusTable:
67
67
  assert 'request_status' in status and 'message' in status
68
68
  assert status['request_status'] in JobStatusTable.allowed_statuses
69
69
 
70
- # TODO do not read the entry first to get the call_params when the Cosmos SDK add a
71
- # patching functionality:
72
- # https://feedback.azure.com/forums/263030-azure-cosmos-db/suggestions/6693091-be-able-to-do-partial-updates-on-document
73
70
  item_old = self.read_job_status(job_id)
74
71
  if item_old is None:
75
72
  raise ValueError
@@ -19,7 +19,6 @@ log = logging.getLogger(os.environ['FLASK_APP'])
19
19
  #%% helper classes and functions
20
20
 
21
21
  def make_error(error_code: int, error_message: str) -> Tuple[dict, int]:
22
- # TODO log exception when we have more telemetry
23
22
  log.error(f'Error {error_code} - {error_message}')
24
23
  return {'error': error_message}, error_code
25
24
 
@@ -28,8 +27,6 @@ def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, st
28
27
  """
29
28
  Returns a tuple (error_code, msg) if not a usable SAS URL, else returns None
30
29
  """
31
- # TODO check that the expiry date of input_container_sas is at least a month
32
- # into the future
33
30
  permissions = sas_blob_utils.get_permissions_from_uri(input_container_sas)
34
31
  data = sas_blob_utils.get_all_query_parts(input_container_sas)
35
32
 
@@ -38,7 +35,6 @@ def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, st
38
35
  if 'read' not in permissions or 'list' not in permissions:
39
36
  if 'si' in data:
40
37
  # if no permission specified explicitly but has an access policy, assumes okay
41
- # TODO - check based on access policy as well
42
38
  return None
43
39
 
44
40
  return 400, msg
@@ -72,7 +72,6 @@ def main():
72
72
 
73
73
  print(deployment_id)
74
74
 
75
- # TODO: check project ID ?
76
75
  sql = ''' SELECT emammal_project_taxa_id FROM wild_id.emammal_project_taxa
77
76
  where species in ("No Animal", "Unknown Animal", "Homo sapiens", "Vehicle") '''
78
77
 
@@ -147,8 +147,6 @@ def detect_sync():
147
147
 
148
148
  try:
149
149
  # Write images to temporary files
150
- #
151
- # TODO: read from memory rather than using intermediate files
152
150
  os.makedirs(temp_direc,exist_ok=True)
153
151
  for name, file in request.files.items():
154
152
  if file.content_type in config.IMAGE_CONTENT_TYPES:
@@ -166,7 +164,6 @@ def detect_sync():
166
164
 
167
165
  while True:
168
166
 
169
- # TODO: convert to a blocking read and eliminate the sleep() statement in this loop
170
167
  result = db.get(redis_id)
171
168
 
172
169
  if result:
@@ -90,9 +90,6 @@ def round_filters(filters, global_params):
90
90
  multiplier = global_params.width_coefficient
91
91
  if not multiplier:
92
92
  return filters
93
- # TODO: modify the params names.
94
- # maybe the names (width_divisor,min_width)
95
- # are more suitable than (depth_divisor,min_depth).
96
93
  divisor = global_params.depth_divisor
97
94
  min_depth = global_params.min_depth
98
95
  filters *= multiplier
@@ -235,5 +235,3 @@ if False:
235
235
  #%% Command-line driver
236
236
 
237
237
  # TODO
238
-
239
-
@@ -295,8 +295,9 @@ class SequenceOptions:
295
295
  Options parameterizing the grouping of images into sequences by time.
296
296
  """
297
297
 
298
- #: Images separated by <= this duration will be grouped into the same sequence.
299
- episode_interval_seconds = 60.0
298
+ def __init__(self):
299
+ #: Images separated by <= this duration will be grouped into the same sequence.
300
+ self.episode_interval_seconds = 60.0
300
301
 
301
302
 
302
303
  #%% Functions
@@ -305,16 +306,24 @@ def create_sequences(image_info,options=None):
305
306
  """
306
307
  Synthesizes episodes/sequences/bursts for the images in [image_info].
307
308
 
308
- Modifies [image_info], populating the 'seq_id', 'seq_num_frames', and 'frame_num' fields
309
- for each image.
309
+ Modifies [image_info] in place, populating the 'seq_id', 'seq_num_frames', and 'frame_num'
310
+ fields for each image.
310
311
 
311
312
  Args:
312
- image_info (dict): a list of dicts in CCT format, i.e. with fields 'file_name' (str),
313
- 'datetime' (datetime), and 'location' (str).
313
+ image_info (str, dict, or list): a dict in CCT format, a CCT .json file, or just the 'images' component
314
+ of a CCT dataset (a list of dicts with fields 'file_name' (str), 'datetime' (datetime), and
315
+ 'location' (str)).
314
316
  """
315
317
 
316
318
  if options is None:
317
319
  options = SequenceOptions()
320
+
321
+ if isinstance(image_info,str):
322
+ with open(image_info,'r') as f:
323
+ image_info = json.load(f)
324
+
325
+ if isinstance(image_info,dict):
326
+ image_info = image_info['images']
318
327
 
319
328
  # Find all unique locations
320
329
  locations = set()
@@ -95,7 +95,18 @@ def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=N
95
95
  def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
96
96
  """
97
97
  For all the images in [coco_data] (a dict or a filename), write a .json file in
98
- labelme format alongside the corresponding relative path within image_base.
98
+ labelme format alongside the corresponding relative path within image_base.
99
+
100
+ Args:
101
+ coco_data (dict or str): path to a COCO-formatted .json file, or an already-loaded
102
+ COCO-formatted dict
103
+ image_base (str): path where images live (filenames in [coco_data] should be relative to
104
+ [image_base]); this is also where labelme files will be written
105
+ overwrite (bool, optional): overwrite existing .json files
106
+ bypass_image_size_check (bool, optional): if you're sure that the COCO data already has
107
+ correct 'width' and 'height' fields, this bypasses the somewhat-slow loading of
108
+ each image to fetch image sizes
109
+ verbose (bool, optional): enable additional debug output
99
110
  """
100
111
 
101
112
  # Load COCO data if necessary
@@ -37,30 +37,34 @@ class IntegrityCheckOptions:
37
37
  Options for integrity_check_json_db()
38
38
  """
39
39
 
40
- #: Image path; the filenames in the .json file should be relative to this folder
41
- baseDir = ''
42
-
43
- #: Should we validate the image sizes?
44
- bCheckImageSizes = False
45
-
46
- #: Should we check that all the images in the .json file exist on disk?
47
- bCheckImageExistence = False
48
-
49
- #: Should we search [baseDir] for images that are not used in the .json file?
50
- bFindUnusedImages = False
51
-
52
- #: Should we require that all images in the .json file have a 'location' field?
53
- bRequireLocation = True
54
-
55
- #: For debugging, limit the number of images we'll process
56
- iMaxNumImages = -1
57
-
58
- #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
59
- nThreads = 10
60
-
61
- #: Enable additional debug output
62
- verbose = True
63
-
40
+ def __init__(self):
41
+
42
+ #: Image path; the filenames in the .json file should be relative to this folder
43
+ self.baseDir = ''
44
+
45
+ #: Should we validate the image sizes?
46
+ self.bCheckImageSizes = False
47
+
48
+ #: Should we check that all the images in the .json file exist on disk?
49
+ self.bCheckImageExistence = False
50
+
51
+ #: Should we search [baseDir] for images that are not used in the .json file?
52
+ self.bFindUnusedImages = False
53
+
54
+ #: Should we require that all images in the .json file have a 'location' field?
55
+ self.bRequireLocation = True
56
+
57
+ #: For debugging, limit the number of images we'll process
58
+ self.iMaxNumImages = -1
59
+
60
+ #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
61
+ self.nThreads = 10
62
+
63
+ #: Enable additional debug output
64
+ self.verbose = True
65
+
66
+ #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
67
+ self.allowIntIDs = False
64
68
 
65
69
  # This is used in a medium-hacky way to share modified options across threads
66
70
  defaultOptions = IntegrityCheckOptions()
@@ -231,7 +235,12 @@ def integrity_check_json_db(jsonFile, options=None):
231
235
  imagePathsInJson.add(image['file_name'])
232
236
 
233
237
  assert isinstance(image['file_name'],str), 'Illegal image filename type'
234
- assert isinstance(image['id'],str), 'Illegal image ID type'
238
+
239
+ if options.allowIntIDs:
240
+ assert isinstance(image['id'],str) or isinstance(image['id'],int), \
241
+ 'Illegal image ID type'
242
+ else:
243
+ assert isinstance(image['id'],str), 'Illegal image ID type'
235
244
 
236
245
  imageId = image['id']
237
246
 
@@ -329,9 +338,16 @@ def integrity_check_json_db(jsonFile, options=None):
329
338
  assert 'id' in ann
330
339
  assert 'category_id' in ann
331
340
 
332
- assert isinstance(ann['id'],str), 'Illegal annotation ID type'
341
+ if options.allowIntIDs:
342
+ assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
343
+ 'Illegal annotation ID type'
344
+ assert isinstance(ann['image_id'],str) or isinstance(ann['image_id'],int), \
345
+ 'Illegal annotation image ID type'
346
+ else:
347
+ assert isinstance(ann['id'],str), 'Illegal annotation ID type'
348
+ assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
349
+
333
350
  assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
334
- assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
335
351
 
336
352
  if 'bbox' in ann:
337
353
  nBoxes += 1
@@ -315,7 +315,7 @@ def process_file(fn_relative,verbose=False):
315
315
  bottom = float(position[3])
316
316
  frame_number = int(position[4])
317
317
 
318
- # TODO: I'm being lazy about the fact that these don't reflect the
318
+ # I'm being lazy about the fact that these don't reflect the
319
319
  # pixels cropped out of the border. IMO this is OK because for this dataset,
320
320
  # this is just an approximate set of coordinates used to disambiguate simultaneous
321
321
  # areas of movement when multiple different labels are present in the same video.
@@ -488,7 +488,6 @@ def process_file(fn_relative,verbose=False):
488
488
  if tag['confidence'] >= confidence_threshold:
489
489
  valid_tags.append(tag)
490
490
  else:
491
- # TODO
492
491
  print('Zero-confidence tag in {}'.format(fn_relative))
493
492
 
494
493
  track_info['tags'] = valid_tags
@@ -497,13 +496,11 @@ def process_file(fn_relative,verbose=False):
497
496
  if len(valid_tags) > 0:
498
497
  valid_tracks.append(track_info)
499
498
  else:
500
- # TODO
501
499
  print('Invalid track in {}'.format(fn_relative))
502
500
 
503
501
  # ...for each track
504
502
 
505
503
  if (len(clip_metadata['tracks']) > 0) and (len(valid_tracks) == 0):
506
- # TODO
507
504
  print('Removed all tracks from {}'.format(fn_relative))
508
505
 
509
506
  clip_metadata['tracks'] = valid_tracks
@@ -868,7 +868,3 @@ if False:
868
868
  if extracted_datetime is not None:
869
869
  assert extracted_datetime.year <= 2023 and extracted_datetime.year >= 1990
870
870
 
871
-
872
- #%% Command-line driver
873
-
874
- # TODO
@@ -16,7 +16,7 @@ path. No attempt is made to be consistent in format across the two approaches.
16
16
  import os
17
17
  import subprocess
18
18
  import json
19
- from datetime import datetime
19
+ from datetime import date, datetime
20
20
 
21
21
  from multiprocessing.pool import ThreadPool as ThreadPool
22
22
  from multiprocessing.pool import Pool as Pool
@@ -26,6 +26,7 @@ from PIL import Image, ExifTags
26
26
 
27
27
  from megadetector.utils.path_utils import find_images, is_executable
28
28
  from megadetector.utils.ct_utils import args_to_object
29
+ from megadetector.utils.ct_utils import image_file_to_camera_folder
29
30
 
30
31
  debug_max_images = None
31
32
 
@@ -37,51 +38,74 @@ class ReadExifOptions:
37
38
  Parameters controlling metadata extraction.
38
39
  """
39
40
 
40
- #: Enable additional debug console output
41
- verbose = False
42
-
43
- #: If this is True and an output file is specified for read_exif_from_folder,
44
- #: and we encounter a serialization issue, we'll return the results but won't
45
- #: error.
46
- allow_write_error = False
47
-
48
- #: Number of concurrent workers, set to <= 1 to disable parallelization
49
- n_workers = 1
50
-
51
- #: Should we use threads (vs. processes) for parallelization?
52
- #:
53
- #: Not relevant if n_workers is <= 1.
54
- use_threads = True
41
+ def __init__(self):
55
42
 
56
- #: "File" and "ExifTool" are tag types used by ExifTool to report data that
57
- #: doesn't come from EXIF, rather from the file (e.g. file size).
58
- tag_types_to_ignore = set(['File','ExifTool'])
59
-
60
- #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
61
- #:
62
- #: A useful set of tags one might want to limit queries for:
63
- #:
64
- #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
65
- #: 'DateTimeOriginal','Orientation']
66
- tags_to_include = None
43
+ #: Enable additional debug console output
44
+ self.verbose = False
45
+
46
+ #: If this is True and an output file is specified for read_exif_from_folder,
47
+ #: and we encounter a serialization issue, we'll return the results but won't
48
+ #: error.
49
+ self.allow_write_error = False
50
+
51
+ #: Number of concurrent workers, set to <= 1 to disable parallelization
52
+ self.n_workers = 1
53
+
54
+ #: Should we use threads (vs. processes) for parallelization?
55
+ #:
56
+ #: Not relevant if n_workers is <= 1.
57
+ self.use_threads = True
58
+
59
+ #: "File" and "ExifTool" are tag types used by ExifTool to report data that
60
+ #: doesn't come from EXIF, rather from the file (e.g. file size).
61
+ self.tag_types_to_ignore = set(['File','ExifTool'])
62
+
63
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
64
+ #:
65
+ #: A useful set of tags one might want to limit queries for:
66
+ #:
67
+ #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
68
+ #: 'DateTimeOriginal','Orientation']
69
+ self.tags_to_include = None
70
+
71
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
72
+ self.tags_to_exclude = None
73
+
74
+ #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
75
+ #: can be just "exiftool", in which case it should be on your system path.
76
+ self.exiftool_command_name = 'exiftool'
77
+
78
+ #: How should we handle byte-formatted EXIF tags?
79
+ #:
80
+ #: 'convert_to_string': convert to a Python string
81
+ #: 'delete': don't include at all
82
+ #: 'raw': include as a byte string
83
+ self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
84
+
85
+ #: Should we use exiftool or PIL?
86
+ self.processing_library = 'pil' # 'exiftool','pil'
87
+
88
+
89
+ class ExifResultsToCCTOptions:
90
+ """
91
+ Options controlling the behavior of exif_results_to_cct() (which reformats the datetime information)
92
+ extracted by read_exif_from_folder().
93
+ """
67
94
 
68
- #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
69
- tags_to_exclude = None
95
+ def __init__(self):
96
+
97
+ #: Timestamps older than this are assumed to be junk; lots of cameras use a
98
+ #: default time in 2000.
99
+ self.min_valid_timestamp_year = 2001
70
100
 
71
- #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
72
- #: can be just "exiftool", in which case it should be on your system path.
73
- exiftool_command_name = 'exiftool'
101
+ #: The EXIF tag from which to pull datetime information
102
+ self.exif_datetime_tag = 'DateTimeOriginal'
74
103
 
75
- #: How should we handle byte-formatted EXIF tags?
76
- #:
77
- #: 'convert_to_string': convert to a Python string
78
- #: 'delete': don't include at all
79
- #: 'raw': include as a byte string
80
- byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
104
+ #: Function for extracting location information, should take a string
105
+ #: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
106
+ #: this is None, location is written as "unknown".
107
+ self.filename_to_location_function = image_file_to_camera_folder
81
108
 
82
- #: Should we use exiftool or PIL?
83
- processing_library = 'pil' # 'exiftool','pil'
84
-
85
109
 
86
110
  #%% Functions
87
111
 
@@ -437,7 +461,7 @@ def _create_image_objects(image_files,recursive=True):
437
461
  def _populate_exif_for_images(image_base,images,options=None):
438
462
  """
439
463
  Main worker loop: read EXIF data for each image object in [images] and
440
- populate the image objects.
464
+ populate the image objects in place.
441
465
 
442
466
  'images' should be a list of dicts with the field 'file_name' containing
443
467
  a relative path (relative to 'image_base').
@@ -544,6 +568,8 @@ def _write_exif_results(results,output_file):
544
568
 
545
569
  print('Wrote results to {}'.format(output_file))
546
570
 
571
+ # ..._write_exif_results(...)
572
+
547
573
 
548
574
  def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
549
575
  """
@@ -559,10 +585,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
559
585
  a list of absolute filenames (if [input_folder] is None)
560
586
  recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
561
587
  is None.
588
+ verbose (bool, optional): enable additional debug output
562
589
 
563
590
  Returns:
564
- dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
565
- we're using PIL or exiftool.
591
+ list: list of dicts, each of which contains EXIF information for one images. Fields include at least:
592
+ * 'file_name': the relative path to the image
593
+ * 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
566
594
  """
567
595
 
568
596
  if options is None:
@@ -618,6 +646,112 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
618
646
 
619
647
  return results
620
648
 
649
+ # ...read_exif_from_folder(...)
650
+
651
+
652
+ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
653
+ """
654
+ Given the EXIF results for a folder of images read via read_exif_from_folder,
655
+ create a COCO Camera Traps .json file that has no annotations, but
656
+ attaches image filenames to locations and datetimes.
657
+
658
+ Args:
659
+ exif_results (str or list): the filename (or loaded list) containing the results
660
+ from read_exif_from_folder
661
+ cct_file (str,optional): the filename to which we should write COCO-Camera-Traps-formatted
662
+ data
663
+
664
+ Returns:
665
+ dict: a COCO Camera Traps dict (with no annotations).
666
+ """
667
+
668
+ if options is None:
669
+ options = ExifResultsToCCTOptions()
670
+
671
+ if isinstance(exif_results,str):
672
+ print('Reading EXIF results from {}'.format(exif_results))
673
+ with open(exif_results,'r') as f:
674
+ exif_results = json.load(f)
675
+ else:
676
+ assert isinstance(exif_results,list)
677
+
678
+ now = datetime.now()
679
+
680
+ image_info = []
681
+
682
+ images_without_datetime = []
683
+ images_with_invalid_datetime = []
684
+
685
+ # exif_result = exif_results[0]
686
+ for exif_result in tqdm(exif_results):
687
+
688
+ im = {}
689
+
690
+ # By default we assume that each leaf-node folder is a location
691
+ if options.filename_to_location_function is None:
692
+ im['location'] = 'unknown'
693
+ else:
694
+ im['location'] = options.filename_to_location_function(exif_result['file_name'])
695
+
696
+ im['file_name'] = exif_result['file_name']
697
+ im['id'] = im['file_name']
698
+
699
+ if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
700
+ (options.exif_datetime_tag not in exif_result['exif_tags']):
701
+ exif_dt = None
702
+ else:
703
+ exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
704
+ exif_dt = parse_exif_datetime_string(exif_dt)
705
+ if exif_dt is None:
706
+ im['datetime'] = None
707
+ images_without_datetime.append(im['file_name'])
708
+ else:
709
+ dt = exif_dt
710
+
711
+ # An image from the future (or within the last 24 hours) is invalid
712
+ if (now - dt).total_seconds() <= 1*24*60*60:
713
+ print('Warning: datetime for {} is {}'.format(
714
+ im['file_name'],dt))
715
+ im['datetime'] = None
716
+ images_with_invalid_datetime.append(im['file_name'])
717
+
718
+ # An image from before the dawn of time is also invalid
719
+ elif dt.year < options.min_valid_timestamp_year:
720
+ print('Warning: datetime for {} is {}'.format(
721
+ im['file_name'],dt))
722
+ im['datetime'] = None
723
+ images_with_invalid_datetime.append(im['file_name'])
724
+
725
+ else:
726
+ im['datetime'] = dt
727
+
728
+ image_info.append(im)
729
+
730
+ # ...for each exif image result
731
+
732
+ print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
733
+ len(images_without_datetime),len(exif_results)))
734
+
735
+ d = {}
736
+ d['info'] = {}
737
+ d['images'] = image_info
738
+ d['annotations'] = []
739
+ d['categories'] = []
740
+
741
+ def json_serialize_datetime(obj):
742
+ if isinstance(obj, (datetime, date)):
743
+ return obj.isoformat()
744
+ raise TypeError('Object {} (type {}) not serializable'.format(
745
+ str(obj),type(obj)))
746
+
747
+ if cct_output_file is not None:
748
+ with open(cct_output_file,'w') as f:
749
+ json.dump(d,f,indent=1,default=json_serialize_datetime)
750
+
751
+ return d
752
+
753
+ # ...exif_results_to_cct(...)
754
+
621
755
 
622
756
  #%% Interactive driver
623
757