megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,566 @@
1
+ """
2
+
3
+ cct_json_utils.py
4
+
5
+ Utilities for working with COCO Camera Traps .json databases:
6
+
7
+ https://github.com/agentmorris/MegaDetector/blob/main/megadetector/data_management/README.md#coco-cameratraps-format
8
+
9
+ """
10
+
11
+ #%% Constants and imports
12
+
13
+ import json
14
+ import os
15
+ import datetime
16
+ import dateutil
17
+
18
+ from tqdm import tqdm
19
+ from collections import defaultdict, OrderedDict
20
+
21
+
22
+ #%% Classes
23
+
24
+ class CameraTrapJsonUtils:
25
+ """
26
+ Miscellaneous utility functions for working with COCO Camera Traps databases
27
+ """
28
+
29
+ @staticmethod
30
+ def annotations_to_string(annotations, cat_id_to_name):
31
+ """
32
+ Given a list of annotations and a mapping from class IDs to names, produces
33
+ a comma-delimited string containing a list of class names, sorted alphabetically.
34
+
35
+ Args:
36
+ annotations (list): a list of annotation dicts
37
+ cat_id_to_name (dict): a dict mapping category IDs to category names
38
+
39
+ Returns:
40
+ str: a comma-delimited list of class names
41
+ """
42
+
43
+ class_names = CameraTrapJsonUtils.annotations_to_class_names(annotations, cat_id_to_name)
44
+ return ','.join(class_names)
45
+
46
+
47
+ @staticmethod
48
+ def annotations_to_class_names(annotations, cat_id_to_name):
49
+ """
50
+ Given a list of annotations and a mapping from class IDs to names, produces
51
+ a list of class names, sorted alphabetically.
52
+
53
+ Args:
54
+ annotations (list): a list of annotation dicts
55
+ cat_id_to_name (dict): a dict mapping category IDs to category names
56
+
57
+ Returns:
58
+ list: a list of class names present in [annotations]
59
+ """
60
+
61
+ # Collect all names
62
+ class_names = [cat_id_to_name[ann['category_id']] for ann in annotations]
63
+ # Make names unique and sort
64
+ class_names = sorted(set(class_names))
65
+ return class_names
66
+
67
+
68
+ @staticmethod
69
+ def order_db_keys(db):
70
+ """
71
+ Given a dict representing a JSON database in the COCO Camera Trap
72
+ format, returns an OrderedDict with keys in the order of 'info',
73
+ 'categories', 'annotations' and 'images'. When this OrderedDict is
74
+ serialized with json.dump(), the order of the keys are preserved.
75
+
76
+ Args:
77
+ db (dict): a JSON database in the COCO Camera Trap format
78
+
79
+ Returns:
80
+ dict: the same content as [db] but as an OrderedDict with keys ordered for
81
+ readability
82
+ """
83
+
84
+ ordered = OrderedDict([
85
+ ('info', db['info']),
86
+ ('categories', db['categories']),
87
+ ('annotations', db['annotations']),
88
+ ('images', db['images'])])
89
+ return ordered
90
+
91
+
92
+ @staticmethod
93
+ def group_annotations_by_image_field(db_indexed, image_field='seq_id'):
94
+ """
95
+ Given an instance of IndexedJsonDb, group annotation entries by a field in the
96
+ image entry. Typically used to find all the annotations associated with a sequence.
97
+
98
+ Args:
99
+ db_indexed (IndexedJsonDb): an initialized IndexedJsonDb, typically loaded from a
100
+ COCO Camera Traps .json file
101
+ image_field (str, optional): a field by which to group annotations (defaults
102
+ to 'seq_id')
103
+
104
+ Returns:
105
+ dict: a dict mapping objects (typically strings, in fact typically sequence IDs) to
106
+ lists of annotations
107
+ """
108
+
109
+ image_id_to_image_field = {}
110
+ for image_id, image_entry in db_indexed.image_id_to_image.items():
111
+ image_id_to_image_field[image_id] = image_entry[image_field]
112
+
113
+ res = defaultdict(list)
114
+ for annotations in db_indexed.image_id_to_annotations.values():
115
+ for annotation_entry in annotations:
116
+ field_value = image_id_to_image_field[annotation_entry['image_id']]
117
+ res[field_value].append(annotation_entry)
118
+ return res
119
+
120
+
121
+ @staticmethod
122
+ def get_entries_for_locations(db, locations):
123
+ """
124
+ Given a dict representing a JSON database in the COCO Camera Trap format, returns a dict
125
+ with the 'images' and 'annotations' fields in the CCT format, each is an array that only
126
+ includes entries in the original [db] that are in the [locations] set.
127
+
128
+ Args:
129
+ db (dict): a dict representing a JSON database in the COCO Camera Trap format
130
+ locations (set): a set or list of locations to include; each item is a string
131
+
132
+ Returns:
133
+ dict: a dict with the 'images' and 'annotations' fields in the CCT format
134
+ """
135
+
136
+ locations = set(locations)
137
+ print('Original DB has {} image and {} annotation entries.'.format(
138
+ len(db['images']), len(db['annotations'])))
139
+ new_db = { 'images': [], 'annotations': [] }
140
+ new_images = set()
141
+ for i in db['images']:
142
+ # cast location to string as the entries in locations are strings
143
+ if str(i['location']) in locations:
144
+ new_db['images'].append(i)
145
+ new_images.add(i['id'])
146
+ for a in db['annotations']:
147
+ if a['image_id'] in new_images:
148
+ new_db['annotations'].append(a)
149
+ print(
150
+ 'New DB has {} image and {} annotation entries.'.format(
151
+ len(new_db['images']), len(new_db['annotations'])))
152
+ return new_db
153
+
154
+
155
+ class IndexedJsonDb:
156
+ """
157
+ Wrapper for a COCO Camera Traps database.
158
+
159
+ Handles boilerplate dictionary creation that we do almost every time we load
160
+ a .json database.
161
+ """
162
+
163
+ def __init__(self,
164
+ json_filename,
165
+ b_normalize_paths=False,
166
+ filename_replacements=None,
167
+ b_convert_classes_to_lower=True,
168
+ b_force_forward_slashes=True):
169
+ """
170
+ Constructor for IndexedJsonDb that loads from a .json file or CCT-formatted dict.
171
+
172
+ Args:
173
+ json_filename (str): filename to load, or an already-loaded dict
174
+ b_normalize_paths (bool, optional): whether to invoke os.path.normpath on
175
+ all filenames. Not relevant if b_force_forward_slashes is True.
176
+ filename_replacements (dict, optional): a set of string --> string mappings
177
+ that will trigger replacements in all filenames, typically used to remove
178
+ leading folders
179
+ b_convert_classes_to_lower (bool, optional): whether to convert all class
180
+ names to lowercase
181
+ b_force_forward_slashes (bool, optional): whether to convert backslashes to
182
+ forward slashes in all path names
183
+ """
184
+
185
+ if isinstance(json_filename, str):
186
+ with open(json_filename) as f:
187
+ self.db = json.load(f)
188
+ else:
189
+ self.db = json_filename
190
+
191
+ assert 'images' in self.db, (
192
+ f'Could not find image list in file {json_filename}, are you sure '
193
+ 'this is a COCO camera traps file?')
194
+
195
+ if b_convert_classes_to_lower:
196
+ # Convert classnames to lowercase to simplify comparisons later
197
+ for c in self.db['categories']:
198
+ c['name'] = c['name'].lower()
199
+
200
+ # Normalize paths to simplify comparisons later
201
+ if b_normalize_paths:
202
+ for im in self.db['images']:
203
+ im['file_name'] = os.path.normpath(im['file_name'])
204
+
205
+ if b_force_forward_slashes:
206
+ for im in self.db['images']:
207
+ im['file_name'] = im['file_name'].replace('\\','/')
208
+
209
+ if filename_replacements is not None:
210
+ for s in filename_replacements:
211
+ # Make custom replacements in filenames, typically used to
212
+ # accommodate changes in root paths after DB construction
213
+ r = filename_replacements[s]
214
+ for im in self.db['images']:
215
+ im['file_name'] = im['file_name'].replace(s, r)
216
+
217
+ ### Build useful mappings to facilitate working with the DB
218
+
219
+ # Category ID <--> name
220
+ self.cat_id_to_name = {
221
+ cat['id']: cat['name'] for cat in self.db['categories']}
222
+ self.cat_name_to_id = {
223
+ cat['name']: cat['id'] for cat in self.db['categories']}
224
+
225
+ # Image filename --> ID
226
+ self.filename_to_id = {
227
+ im['file_name']: im['id'] for im in self.db['images']}
228
+
229
+ # Image ID --> image object
230
+ self.image_id_to_image = {im['id']: im for im in self.db['images']}
231
+
232
+ # Image ID --> annotations
233
+ # Each image can potentially multiple annotations, hence using lists
234
+ self.image_id_to_annotations = defaultdict(list)
235
+ for ann in self.db['annotations']:
236
+ self.image_id_to_annotations[ann['image_id']].append(ann)
237
+
238
+ # ...__init__
239
+
240
+
241
+ def get_annotations_for_image(self, image):
242
+ """
243
+ Finds all the annnotations associated with the image dict [image].
244
+
245
+ Args:
246
+ image (dict): an image dict loaded from a CCT .json file. Only the 'id' field
247
+ is used.
248
+
249
+ Returns:
250
+ list: list of annotations associated with this image. Returns None if the db
251
+ has not been loaded, or [] if no annotations are available for this image.
252
+ """
253
+
254
+ if self.db is None:
255
+ return None
256
+
257
+ if image['id'] not in self.image_id_to_annotations:
258
+ return []
259
+
260
+ image_annotations = self.image_id_to_annotations[image['id']]
261
+ return image_annotations
262
+
263
+
264
+ def get_classes_for_image(self, image):
265
+ """
266
+ Returns a list of class names associated with [image].
267
+
268
+ Args:
269
+ image (dict): an image dict loaded from a CCT .json file. Only the 'id' field
270
+ is used.
271
+
272
+ Returns:
273
+ list: list of class names associated with this image. Returns None if the db
274
+ has not been loaded, or [] if no annotations are available for this image.
275
+ """
276
+
277
+ if self.db is None:
278
+ return None
279
+
280
+ if image['id'] not in self.image_id_to_annotations:
281
+ return []
282
+
283
+ class_ids = []
284
+ image_annotations = self.image_id_to_annotations[image['id']]
285
+ for ann in image_annotations:
286
+ class_ids.append(ann['category_id'])
287
+ class_ids = sorted(set(class_ids))
288
+ class_names = [self.cat_id_to_name[x] for x in class_ids]
289
+
290
+ return class_names
291
+
292
+ # ...class IndexedJsonDb
293
+
294
+ class SequenceOptions:
295
+ """
296
+ Options parameterizing the grouping of images into sequences by time.
297
+ """
298
+
299
+ def __init__(self):
300
+
301
+ #: Images separated by <= this duration will be grouped into the same sequence.
302
+ self.episode_interval_seconds = 60.0
303
+
304
+ #: How to handle invalid datetimes: 'error' or 'none'
305
+ self.datetime_conversion_failure_behavior = 'none'
306
+
307
+ #: Enable additional debug output
308
+ self.verbose = False
309
+
310
+
311
+ #%% Functions
312
+
313
+ def write_object_with_serialized_datetimes(d,json_fn):
314
+ """
315
+ Writes the object [d] to the .json file [json_fn] with a standard approach
316
+ to serializing Python datetime objects.
317
+
318
+ Args:
319
+ d (obj): the object to write, typically a dict
320
+ json_fn (str): the output filename
321
+ """
322
+
323
+ # This writes datetimes as:
324
+ #
325
+ # 2022-12-31T09:52:50
326
+ def json_serialize_datetime(obj):
327
+ if isinstance(obj, (datetime.datetime, datetime.date)):
328
+ return obj.isoformat()
329
+ raise TypeError('Object {} (type {}) not serializable'.format(
330
+ str(obj),type(obj)))
331
+
332
+ with open(json_fn,'w') as f:
333
+ json.dump(d,f,indent=1,default=json_serialize_datetime)
334
+
335
+
336
+ def parse_datetimes_from_cct_image_list(images,
337
+ conversion_failure_behavior='error',
338
+ verbose=False):
339
+ """
340
+ Given the "images" field from a COCO camera traps dictionary, converts all
341
+ string-formatted datetime fields to Python datetimes, making reasonable assumptions
342
+ about datetime representations. Modifies [images] in place.
343
+
344
+ Args:
345
+ images (list): a list of dicts in CCT images format
346
+ conversion_failure_behavior (str, optional): determines what happens on a failed
347
+ conversion; can be "error" (raise an error), "str" (leave as a string), or
348
+ "none" (convert to None)
349
+ verbose (bool, optional): enable additional debug output
350
+
351
+ Returns:
352
+ images: the input list, with datetimes converted (after modifying in place)
353
+ """
354
+
355
+ assert isinstance(images,list)
356
+
357
+ print('Parsing datetimes from CCT image list...')
358
+
359
+ for im in tqdm(images):
360
+
361
+ if 'datetime' not in im:
362
+ continue
363
+ if isinstance(im['datetime'],datetime.datetime):
364
+ continue
365
+ try:
366
+ dt = dateutil.parser.parse(im['datetime'])
367
+ im['datetime'] = dt
368
+ except Exception as e:
369
+ s = 'could not parse datetime {} from {}: {}'.format(
370
+ str(im['datetime']),im['file_name'],str(e))
371
+ if conversion_failure_behavior == 'error':
372
+ raise ValueError(s)
373
+ elif conversion_failure_behavior == 'str':
374
+ if verbose:
375
+ print('Warning: {}'.format(s))
376
+ pass
377
+ elif conversion_failure_behavior == 'none':
378
+ if verbose:
379
+ print('Warning: {}'.format(s))
380
+ im['datetime'] = None
381
+
382
+ # ...for each image
383
+
384
+ return images
385
+
386
+ # ...def parse_datetimes_from_cct_image_list(...)
387
+
388
+
389
+ def parse_datetimes_from_cct_dict(d,conversion_failure_behavior='error'):
390
+ """
391
+ Given a COCO camera traps dictionary that may just have been loaded from file,
392
+ converts all string-formatted datetime fields to Python datetimes, making
393
+ reasonable assumptions about datetime representations. Modifies [d] in place
394
+ if [d] is supplied as a dict
395
+
396
+ Args:
397
+ d (dict or str): a dict in CCT format or a filename pointing to a CCT .json file
398
+ conversion_failure_behavior (str, optional): determines what happens on a failed
399
+ conversion; can be "error" (raise an error), "str" (leave as a string), or
400
+ "none" (convert to None)
401
+
402
+ Returns:
403
+ dict: the CCT dict with converted datetimes.
404
+ """
405
+
406
+ if isinstance(d,str):
407
+ assert os.path.isfile(d), 'Could not find .json file {}'.format(d)
408
+ with open(d,'r') as f:
409
+ d = json.load(f)
410
+
411
+ images = d['images']
412
+
413
+ # Modifies in place
414
+ _ = parse_datetimes_from_cct_image_list(images)
415
+
416
+ return d
417
+
418
+ # ...def parse_datetimes_from_cct_dict(...)
419
+
420
+
421
+ def create_sequences(image_info,options=None):
422
+ """
423
+ Synthesizes episodes/sequences/bursts for the images in [image_info].
424
+
425
+ Modifies [image_info] in place, populating the 'seq_id', 'seq_num_frames', and 'frame_num'
426
+ fields for each image.
427
+
428
+ Args:
429
+ image_info (str, dict, or list): a dict in CCT format, a CCT .json file, or just the
430
+ 'images' component of a CCT dataset (a list of dicts with fields 'file_name' (str),
431
+ 'datetime' (datetime), and 'location' (str)).
432
+ options (SequenceOptions, optional): options parameterizing the assembly of images into
433
+ sequences; see the SequenceOptions class for details.
434
+
435
+ Returns:
436
+ image_info: if [image_info] is passed as a list, returns the list, otherwise returns
437
+ a CCT-formatted dict.
438
+ """
439
+
440
+ if options is None:
441
+ options = SequenceOptions()
442
+
443
+ to_return = None
444
+
445
+ if isinstance(image_info,list):
446
+ to_return = image_info
447
+
448
+ elif isinstance(image_info,str):
449
+ print('Reading image information from {}'.format(image_info))
450
+ with open(image_info,'r') as f:
451
+ d = json.load(f)
452
+ to_return = d
453
+ image_info = d['images']
454
+
455
+ elif isinstance(image_info,dict):
456
+ to_return = image_info
457
+ image_info = image_info['images']
458
+
459
+ else:
460
+ raise ValueError('Unrecognized type for [image_info]')
461
+
462
+ # Modifies the images in place
463
+ _ = parse_datetimes_from_cct_image_list(image_info,
464
+ conversion_failure_behavior=options.datetime_conversion_failure_behavior,
465
+ verbose=options.verbose)
466
+
467
+ n_invalid_datetimes = 0
468
+ for im in image_info:
469
+ if not isinstance(im['datetime'],datetime.datetime):
470
+ assert im['datetime'] is None, 'At this point, datetimes should be valid or None'
471
+ n_invalid_datetimes += 1
472
+ if n_invalid_datetimes > 0:
473
+ print('Warning: {} of {} images have invalid datetimes'.format(
474
+ n_invalid_datetimes,len(image_info)))
475
+
476
+ # Find all unique locations
477
+ locations = set()
478
+ for im in image_info:
479
+ locations.add(im['location'])
480
+
481
+ print('Found {} locations'.format(len(locations)))
482
+ locations = list(locations)
483
+ locations.sort()
484
+
485
+ all_sequences = set()
486
+
487
+ # i_location = 0; location = locations[i_location]
488
+ for i_location,location in tqdm(enumerate(locations),total=len(locations)):
489
+
490
+ images_this_location = [im for im in image_info if im['location'] == location]
491
+
492
+ # Sorting datetimes fails when there are None's in the list. So instead of sorting datetimes
493
+ # directly, sort tuples with a boolean for none-ness, then the datetime itself.
494
+ #
495
+ # https://stackoverflow.com/questions/18411560/sort-list-while-pushing-none-values-to-the-end
496
+ sorted_images_this_location = sorted(images_this_location,
497
+ key = lambda im: (im['datetime'] is None,im['datetime']))
498
+
499
+ sequence_id_to_images_this_location = defaultdict(list)
500
+
501
+ current_sequence_id = None
502
+ next_frame_number = 0
503
+ next_sequence_number = 0
504
+ previous_datetime = None
505
+
506
+ # previous_datetime = sorted_images_this_location[0]['datetime']
507
+ # im = sorted_images_this_location[1]
508
+ for im in sorted_images_this_location:
509
+
510
+ invalid_datetime = False
511
+
512
+ if previous_datetime is None:
513
+ delta = None
514
+ elif im['datetime'] is None:
515
+ invalid_datetime = True
516
+ else:
517
+ delta = (im['datetime'] - previous_datetime).total_seconds()
518
+
519
+ # Start a new sequence if necessary, including the case where this datetime is invalid
520
+ if (delta is None) or (delta > options.episode_interval_seconds) or (invalid_datetime):
521
+ next_frame_number = 0
522
+ current_sequence_id = 'location_{}_sequence_index_{}'.format(
523
+ location,str(next_sequence_number).zfill(5))
524
+ next_sequence_number = next_sequence_number + 1
525
+ assert current_sequence_id not in all_sequences
526
+ all_sequences.add(current_sequence_id)
527
+
528
+ im['seq_id'] = current_sequence_id
529
+ im['seq_num_frames'] = None
530
+ im['frame_num'] = next_frame_number
531
+ sequence_id_to_images_this_location[current_sequence_id].append(im)
532
+ next_frame_number = next_frame_number + 1
533
+
534
+ # If this was an invalid datetime, this will record the previous datetime
535
+ # as None, which will force the next image to start a new sequence.
536
+ previous_datetime = im['datetime']
537
+
538
+ # ...for each image in this location
539
+
540
+ # Fill in seq_num_frames
541
+ for seq_id in sequence_id_to_images_this_location.keys():
542
+ assert seq_id in sequence_id_to_images_this_location
543
+ images_this_sequence = sequence_id_to_images_this_location[seq_id]
544
+ assert len(images_this_sequence) > 0
545
+ for im in images_this_sequence:
546
+ im['seq_num_frames'] = len(images_this_sequence)
547
+
548
+ # ...for each location
549
+
550
+ print('Created {} sequences from {} images'.format(len(all_sequences),len(image_info)))
551
+
552
+ return to_return
553
+
554
+ # ...def create_sequences(...)
555
+
556
+
557
+ #%% Test drivers
558
+
559
+ if False:
560
+
561
+ pass
562
+
563
+ #%%
564
+
565
+ fn = r'g:\temp\test.json'
566
+ d = parse_datetimes_from_cct_dict(fn,conversion_failure_behavior='error')