megadetector 5.0.21__py3-none-any.whl → 5.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (37) hide show
  1. megadetector/data_management/cct_json_utils.py +143 -7
  2. megadetector/data_management/cct_to_md.py +12 -5
  3. megadetector/data_management/databases/integrity_check_json_db.py +83 -77
  4. megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
  5. megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
  6. megadetector/data_management/lila/create_lila_test_set.py +25 -11
  7. megadetector/data_management/lila/download_lila_subset.py +9 -2
  8. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
  9. megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
  10. megadetector/data_management/read_exif.py +10 -14
  11. megadetector/data_management/rename_images.py +1 -1
  12. megadetector/detection/process_video.py +14 -3
  13. megadetector/detection/pytorch_detector.py +15 -3
  14. megadetector/detection/run_detector.py +4 -3
  15. megadetector/detection/run_detector_batch.py +2 -2
  16. megadetector/detection/run_inference_with_yolov5_val.py +121 -13
  17. megadetector/detection/video_utils.py +21 -10
  18. megadetector/postprocessing/classification_postprocessing.py +1 -1
  19. megadetector/postprocessing/compare_batch_results.py +931 -142
  20. megadetector/postprocessing/detector_calibration.py +243 -45
  21. megadetector/postprocessing/md_to_coco.py +85 -20
  22. megadetector/postprocessing/postprocess_batch_results.py +0 -1
  23. megadetector/postprocessing/validate_batch_results.py +65 -15
  24. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
  25. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  27. megadetector/utils/ct_utils.py +71 -14
  28. megadetector/utils/md_tests.py +9 -1
  29. megadetector/utils/path_utils.py +14 -7
  30. megadetector/utils/process_utils.py +9 -3
  31. megadetector/utils/write_html_image_list.py +5 -1
  32. megadetector/visualization/visualization_utils.py +211 -87
  33. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/METADATA +19 -18
  34. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/RECORD +37 -36
  35. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/WHEEL +1 -1
  36. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/LICENSE +0 -0
  37. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,416 @@
1
+ """
2
+
3
+ raic_csv_to_md_results.py
4
+
5
+ Converts classification+detection results in the .csv format provided to the Snapshot
6
+ Serengeti program by the RAIC team to the MD results format.
7
+
8
+ The input format is two .csv files:
9
+
10
+ * One with results, with columns [unnamed], filename, category, x_center, y_center,
11
+ width, height, confidence, datetime
12
+
13
+ * One with class IDs and names, with columns CLASS, SPECIES
14
+
15
+ Filenames are relative paths to .txt files, but with slashes replaced by underscores, e.g. this
16
+ file:
17
+
18
+ B04_R1/I__00122.JPG
19
+
20
+ ...appears in the .csv file as:
21
+
22
+ B04_R1_I__00122.txt
23
+
24
+ Image coordinates are in absolute floating-point units, with an upper-left origin.
25
+
26
+ Unknowns at the time I'm writing this:
27
+
28
+ * I don't know what the unnamed column is, but it looks like an ID I can safely ignore.
29
+
30
+ * I believe that MegaDetector was run, then a classifier was run, but there is a
31
+ single "confidence" column in the output. I am writing out the results as if they were a
32
+ single multi-class detector. This is suspicious given the lack of a human class, which suggests
33
+ that this is intended to be run in conjunection with MD.
34
+
35
+ * There is no concept of "empty" in this file format, so by default I assume that images with
36
+ no annotations in the .csv file were processed and determine to have no detections above some
37
+ (unknown) threshold.
38
+
39
+ * I'm not currently handling EXIF rotations, as part of the effort to simplify this file
40
+ for conversion to R (see below).
41
+
42
+ Note to self: this file should not take dependencies on other components of the MD
43
+ repo, at the risk of creating some redundancy. I am going to convert this to R,
44
+ which will be easier if it's not using any non-standard libraries. Anything in the
45
+ "interactive driver" cells gets a pass.
46
+
47
+ """
48
+
49
+ #%% Imports and constants
50
+
51
+ import os
52
+ import glob
53
+ import json
54
+ import sys
55
+ import argparse
56
+
57
+ import pandas as pd
58
+ from PIL import Image
59
+
60
+
61
+ #%% Functions from the MD python package
62
+
63
+ # ...that I'm choosing to copy and paste to facilitate a conversion of this
64
+ # script to R.
65
+
66
+ # Should all be lower-case
67
+ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
68
+
69
+ def _is_image_file(s, img_extensions=IMG_EXTENSIONS):
70
+ """
71
+ Checks a file's extension against a hard-coded set of image file
72
+ extensions. Uses case-insensitive comparison.
73
+
74
+ Does not check whether the file exists, only determines whether the filename
75
+ implies it's an image file.
76
+
77
+ Args:
78
+ s (str): filename to evaluate for image-ness
79
+ img_extensions (list, optional): list of known image file extensions
80
+
81
+ Returns:
82
+ bool: True if [s] appears to be an image file, else False
83
+ """
84
+
85
+ ext = os.path.splitext(s)[1]
86
+ return ext.lower() in img_extensions
87
+
88
+
89
+ def _find_image_strings(strings):
90
+ """
91
+ Given a list of strings that are potentially image file names, looks for
92
+ strings that actually look like image file names (based on extension).
93
+
94
+ Args:
95
+ strings (list): list of filenames to check for image-ness
96
+
97
+ Returns:
98
+ list: the subset of [strings] that appear to be image filenames
99
+ """
100
+
101
+ return [s for s in strings if _is_image_file(s)]
102
+
103
+
104
+ def _find_images(dirname,
105
+ recursive=False,
106
+ return_relative_paths=False,
107
+ convert_slashes=True):
108
+ """
109
+ Finds all files in a directory that look like image file names. Returns
110
+ absolute paths unless return_relative_paths is set. Uses the OS-native
111
+ path separator unless convert_slashes is set, in which case will always
112
+ use '/'.
113
+
114
+ Args:
115
+ dirname (str): the folder to search for images
116
+ recursive (bool, optional): whether to search recursively
117
+ return_relative_paths (str, optional): return paths that are relative
118
+ to [dirname], rather than absolute paths
119
+ convert_slashes (bool, optional): force forward slashes in return values
120
+
121
+ Returns:
122
+ list: list of image filenames found in [dirname]
123
+ """
124
+
125
+ assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
126
+
127
+ if recursive:
128
+ strings = glob.glob(os.path.join(dirname, '**', '*.*'), recursive=True)
129
+ else:
130
+ strings = glob.glob(os.path.join(dirname, '*.*'))
131
+
132
+ image_files = _find_image_strings(strings)
133
+
134
+ if return_relative_paths:
135
+ image_files = [os.path.relpath(fn,dirname) for fn in image_files]
136
+
137
+ image_files = sorted(image_files)
138
+
139
+ if convert_slashes:
140
+ image_files = [fn.replace('\\', '/') for fn in image_files]
141
+
142
+ return image_files
143
+
144
+
145
+ #%% Main conversion function
146
+
147
+ def raic_csv_to_md_results(result_csv_file,
148
+ class_mapping_csv_file,
149
+ image_folder,
150
+ output_file=None,
151
+ unannotated_image_handling='empty'):
152
+ """
153
+ Converts a pair of .csv files (see file header for details) to MD results format.
154
+
155
+ Currently errors if image filenames are ambiguous, or if any images referred to in
156
+ the results are not available.
157
+
158
+ Args:
159
+ result_csv_file (str): the results file to read (.csv)
160
+ class_mapping_csv_file (str): the class mapping file (.csv)
161
+ image_folder (str): the folder containing all the images referred to in
162
+ [result_csv_file]
163
+ output_file (str, optional): the .json file to which we should write results. Defaults
164
+ to [result_csv_file].json
165
+ unannotated_image_handling (str, optional): can be "empty" (default) to assume
166
+ that images without annotations are empty, "warning", "error", or "skip"
167
+
168
+ Returns:
169
+ str: the output file written, identical to [output_file] if [output_file] was not None
170
+ """
171
+
172
+ # Validate arguments
173
+ assert os.path.isfile(result_csv_file), \
174
+ 'Result file {} not found'.format(result_csv_file)
175
+ assert os.path.isfile(class_mapping_csv_file), \
176
+ 'Class mapping file {} not found'.format(class_mapping_csv_file)
177
+ assert os.path.isdir(image_folder), \
178
+ 'Image folder {} not found'.format(image_folder)
179
+
180
+ if output_file is None:
181
+ output_file = result_csv_file + '.json'
182
+
183
+ image_files_relative = _find_images(image_folder,
184
+ recursive=True,
185
+ return_relative_paths=True,
186
+ convert_slashes=True)
187
+ image_file_base_flattened_to_image_file_relative = {}
188
+ for fn in image_files_relative:
189
+ # Convert, e.g. B04_R1/I__00108.JPG to B04_R1_I__00108
190
+ fn_flattened = fn.replace('/','_')
191
+ fn_flattened_base = os.path.splitext(fn_flattened)[0]
192
+ image_file_base_flattened_to_image_file_relative[fn_flattened_base] = \
193
+ fn
194
+
195
+ # Read the .csv files
196
+ df_results = pd.read_csv(result_csv_file)
197
+ df_class_mapping = pd.read_csv(class_mapping_csv_file)
198
+
199
+ assert 'CLASS' in df_class_mapping.columns and 'SPECIES' in df_class_mapping.columns, \
200
+ 'Unexpected column names in class mapping file {}'.format(class_mapping_csv_file)
201
+
202
+ category_id_to_name = {}
203
+ for i_row,row in df_class_mapping.iterrows():
204
+ class_id = int(row['CLASS'])
205
+ assert class_id not in category_id_to_name, \
206
+ 'Class ID {} occurs more than once in class mapping file {}'.format(
207
+ class_id,class_mapping_csv_file)
208
+ category_id_to_name[class_id] = row['SPECIES']
209
+
210
+ if len(category_id_to_name) != len(set(category_id_to_name.values())):
211
+ print('Warning: one or more categories are used more than once in class mapping file {}'.format(
212
+ class_mapping_csv_file))
213
+
214
+ # Convert results
215
+
216
+ fn_relative_to_im = {}
217
+
218
+ # i_row = 0; row = df_results.iloc[i_row]
219
+ for i_row,row in df_results.iterrows():
220
+
221
+ # Map the .txt filename base to a relative path
222
+ bn = row['filename']
223
+ assert bn.lower().endswith('.txt')
224
+ bn_no_ext = os.path.splitext(bn)[0]
225
+ assert bn_no_ext in image_file_base_flattened_to_image_file_relative, \
226
+ 'No image found for result row {}'.format(row['filename'])
227
+
228
+ image_fn_relative = image_file_base_flattened_to_image_file_relative[bn_no_ext]
229
+
230
+ # Have we seen another detection for this image?
231
+ if image_fn_relative in fn_relative_to_im:
232
+
233
+ im = fn_relative_to_im[image_fn_relative]
234
+
235
+ # If not, load this image so we can read its size
236
+ else:
237
+
238
+ image_fn_abs = os.path.join(image_folder,image_fn_relative)
239
+ image = Image.open(image_fn_abs)
240
+ w = image.size[0]
241
+ h = image.size[1]
242
+
243
+ im = {}
244
+ im['file'] = image_fn_relative
245
+ im['width'] = w
246
+ im['height'] = h
247
+ im['detections'] = []
248
+ im['datetime'] = str(row['datetime'])
249
+ fn_relative_to_im[image_fn_relative] = im
250
+
251
+ # Convert annotation
252
+ x_center_abs = row['x_center']
253
+ y_center_abs = row['y_center']
254
+ box_width_abs = row['width']
255
+ box_height_abs = row['height']
256
+
257
+ # Convert to relative coordinates
258
+ box_left_abs = x_center_abs - (box_width_abs/2.0)
259
+ box_top_abs = y_center_abs - (box_height_abs/2.0)
260
+ bbox_normalized = [box_left_abs/im['width'],
261
+ box_top_abs/im['height'],
262
+ box_width_abs/im['width'],
263
+ box_height_abs/im['height']]
264
+
265
+ category_id = str(int(row['category']))
266
+ confidence = row['confidence']
267
+ assert isinstance(confidence,float) and confidence <= 1.0 and confidence >= 0.0
268
+
269
+ det = {}
270
+ im['detections'].append(det)
271
+ det['category'] = category_id
272
+ det['conf'] = confidence
273
+ det['bbox'] = bbox_normalized
274
+
275
+ # ...for each row
276
+
277
+ n_empty_images = 0
278
+
279
+ # Handle images without annotations
280
+ for fn_relative in image_files_relative:
281
+
282
+ if fn_relative not in fn_relative_to_im:
283
+ if unannotated_image_handling == 'empty':
284
+ im = {}
285
+ im['file'] = fn_relative
286
+ im['detections'] = []
287
+ fn_relative_to_im[fn_relative] = im
288
+ n_empty_images += 1
289
+ # Don't bother to read width and height here
290
+ elif unannotated_image_handling == 'warning':
291
+ print('Warning: image {} is not represented in the .csv results file'.format(fn_relative))
292
+ elif unannotated_image_handling == 'error':
293
+ raise ValueError('Image {} is not represented in the .csv results file'.format(fn_relative))
294
+ elif unannotated_image_handling == 'skip':
295
+ continue
296
+
297
+ # ...for each image file
298
+
299
+ if n_empty_images > 0:
300
+ print('Warning: assuming {} of {} images without annotations are empty'.format(
301
+ n_empty_images,len(image_files_relative)))
302
+
303
+ images = list(fn_relative_to_im.values())
304
+
305
+ # The MD output format uses string-ints for category IDs, right now we have ints
306
+ detection_categories = {}
307
+ for category_id_int in category_id_to_name:
308
+ detection_categories[str(category_id_int)] = category_id_to_name[category_id_int]
309
+
310
+ info = {}
311
+ info['format_version'] = '1.4'
312
+ info['detector'] = 'RAIC .csv converter'
313
+
314
+ d = {}
315
+ d['images'] = images
316
+ d['detection_categories'] = detection_categories
317
+ d['info'] = info
318
+
319
+ with open(output_file,'w') as f:
320
+ json.dump(d,f,indent=1)
321
+
322
+ return output_file
323
+
324
+ # ...def raic_csv_to_md_results(...)
325
+
326
+
327
+ #%% Interactive driver
328
+
329
+ if False:
330
+
331
+ pass
332
+
333
+ #%% Test conversion
334
+
335
+ base_folder = r'G:\temp\S24_B04_R1_output_annotations_for_Dan'
336
+ result_csv_file = os.path.join(base_folder,'S24_B04_R1_output_annotations_for_Dan.csv')
337
+ class_mapping_csv_file = os.path.join(base_folder,'categories_key.csv')
338
+
339
+ # This is wrong, B04_R1 has to be part of the image paths
340
+ # image_folder = os.path.join(base_folder,'B04_R1')
341
+
342
+ image_folder = base_folder
343
+
344
+ output_file = None
345
+ unannotated_image_handling='empty'
346
+
347
+ output_file = raic_csv_to_md_results(result_csv_file=result_csv_file,
348
+ class_mapping_csv_file=class_mapping_csv_file,
349
+ image_folder=image_folder,
350
+ output_file=output_file,
351
+ unannotated_image_handling=unannotated_image_handling)
352
+
353
+ #%% Validate results file
354
+
355
+ from megadetector.postprocessing.validate_batch_results import \
356
+ ValidateBatchResultsOptions, validate_batch_results
357
+
358
+ validation_options = ValidateBatchResultsOptions()
359
+ validation_options.check_image_existence = True
360
+ validation_options.relative_path_base = image_folder
361
+ validation_options.return_data = True
362
+
363
+ results = validate_batch_results(output_file,validation_options)
364
+ assert len(results['validation_results']['errors']) == 0
365
+ assert len(results['validation_results']['warnings']) == 0
366
+
367
+
368
+ #%% Preview results
369
+
370
+ from megadetector.postprocessing.postprocess_batch_results import \
371
+ PostProcessingOptions, process_batch_results
372
+
373
+ postprocessing_options = PostProcessingOptions()
374
+
375
+ postprocessing_options.md_results_file = output_file
376
+ postprocessing_options.output_dir = r'g:\temp\serengeti-conversion-preview'
377
+ postprocessing_options.image_base_dir = image_folder
378
+ postprocessing_options.confidence_threshold = 0.2
379
+ postprocessing_options.num_images_to_sample = None
380
+ postprocessing_options.viz_target_width = 1280
381
+ postprocessing_options.line_thickness = 4
382
+ postprocessing_options.parallelize_rendering_n_cores = 10
383
+ postprocessing_options.parallelize_rendering_with_threads = True
384
+
385
+ postprocessing_results = process_batch_results(postprocessing_options)
386
+
387
+ from megadetector.utils.path_utils import open_file
388
+ open_file(postprocessing_results.output_html_file)
389
+
390
+
391
+ #%% Command-line driver
392
+
393
+ def main():
394
+
395
+ parser = argparse.ArgumentParser()
396
+ parser.add_argument('result_csv_file', type=str,
397
+ help='csv file containing AI results')
398
+ parser.add_argument('class_mapping_csv_file', type=str,
399
+ help='csv file containing class mappings (with columns CLASS, SPECIES)')
400
+ parser.add_argument('image_folder', type=str,
401
+ help='folder containing the images referred to in [result_csv_file]')
402
+ parser.add_argument('--output_file', type=str, default=None,
403
+ help='.json file to which we should write results (defaults to [result_csv_file].json)')
404
+
405
+ if len(sys.argv[1:])==0:
406
+ parser.print_help()
407
+ parser.exit()
408
+
409
+ args = parser.parse_args()
410
+ raic_csv_to_md_results(result_csv_file=args.result_csv_file,
411
+ class_mapping_csv_file=args.class_mapping_csv_file,
412
+ image_folder=args.image_folder,
413
+ output_file=args.output_file)
414
+
415
+ if __name__ == '__main__':
416
+ main()
@@ -14,8 +14,7 @@
14
14
  corrected_label
15
15
 
16
16
  Because the MD results file fundamentally stores detections, what we'll
17
- actually do is created bogus detections that fill the entire image. Detection
18
- coordinates are not currently used in Timelapse video video anyway.
17
+ actually do is create bogus detections that fill the entire image.
19
18
 
20
19
  There is no special handling of empty/blank categories; because these results are
21
20
  based on a classifier, rather than a detector (where "blank" would be the absence of
@@ -16,8 +16,6 @@ import random
16
16
  from megadetector.data_management.lila.lila_common import \
17
17
  read_lila_metadata, read_metadata_file_for_dataset
18
18
 
19
- from megadetector.utils.url_utils import download_url
20
-
21
19
  n_empty_images_per_dataset = 1
22
20
  n_non_empty_images_per_dataset = 1
23
21
 
@@ -48,6 +46,8 @@ for ds_name in metadata_table.keys():
48
46
 
49
47
  #%% Choose images from each dataset
50
48
 
49
+ # Takes ~60 seconds
50
+
51
51
  # ds_name = (list(metadata_table.keys()))[0]
52
52
  for ds_name in metadata_table.keys():
53
53
 
@@ -102,10 +102,12 @@ for ds_name in metadata_table.keys():
102
102
 
103
103
  #%% Convert to URLs
104
104
 
105
+ preferred_cloud = 'gcp'
106
+
105
107
  # ds_name = (list(metadata_table.keys()))[0]
106
108
  for ds_name in metadata_table.keys():
107
109
 
108
- base_url = metadata_table[ds_name]['image_base_url']
110
+ base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
109
111
  assert not base_url.endswith('/')
110
112
 
111
113
  # Retrieve image file names
@@ -123,14 +125,14 @@ for ds_name in metadata_table.keys():
123
125
  # ...for each dataset
124
126
 
125
127
 
126
- #%% Download those image files
128
+ #%% Download image files (prep)
129
+
130
+ url_to_target_file = {}
127
131
 
128
- # TODO: trivially parallelizable
129
- #
130
132
  # ds_name = (list(metadata_table.keys()))[0]
131
133
  for ds_name in metadata_table.keys():
132
134
 
133
- base_url = metadata_table[ds_name]['image_base_url']
135
+ base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
134
136
  assert not base_url.endswith('/')
135
137
  base_url += '/'
136
138
 
@@ -142,11 +144,23 @@ for ds_name in metadata_table.keys():
142
144
  assert base_url in url
143
145
  output_file_relative = ds_name.lower().replace(' ','_') + '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
144
146
  output_file_absolute = os.path.join(output_dir,output_file_relative)
145
- try:
146
- download_url(url, destination_filename=output_file_absolute, force_download=False, verbose=True)
147
- except Exception as e:
148
- print('\n*** Error downloading {} ***\n{}'.format(url,str(e)))
147
+ url_to_target_file[url] = output_file_absolute
149
148
 
150
149
  # ...for each url
151
150
 
152
151
  # ...for each dataset
152
+
153
+
154
+ #%% Download image files (execution)
155
+
156
+ from megadetector.utils.url_utils import parallel_download_urls
157
+
158
+ download_results = parallel_download_urls(url_to_target_file,
159
+ verbose=False,
160
+ overwrite=False,
161
+ n_workers=20,
162
+ pool_type='thread')
163
+
164
+ # r = download_results[0]
165
+ for r in download_results:
166
+ assert r['status'] in ('skipped','success')
@@ -22,7 +22,8 @@ for s in lila_base_urls.values():
22
22
  assert s.endswith('/')
23
23
 
24
24
  # If any of these strings appear in the common name of a species, we'll download that image
25
- species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
25
+ # species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
26
+ species_of_interest = ['bear']
26
27
 
27
28
  # We'll write images, metadata downloads, and temporary files here
28
29
  lila_local_base = os.path.expanduser('~/lila')
@@ -45,7 +46,7 @@ random.seed(0)
45
46
 
46
47
  #%% Download and open the giant table of image URLs and labels
47
48
 
48
- # Takes ~60 seconds to download, unzip, and open
49
+ # Takes ~2 minutes to download, unzip, and open
49
50
  df = read_lila_all_images_file(metadata_dir)
50
51
 
51
52
 
@@ -144,6 +145,12 @@ download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
144
145
  pool_type='thread')
145
146
 
146
147
 
148
+ #%% Open output folder
149
+
150
+ from megadetector.utils.path_utils import open_file
151
+ open_file(output_dir)
152
+
153
+
147
154
  #%% Scrap
148
155
 
149
156
  if False:
@@ -57,6 +57,7 @@ ds_name_to_annotation_level['Channel IslandsCamera Traps'] = 'image'
57
57
  ds_name_to_annotation_level['WCS Camera Traps'] = 'sequence'
58
58
  ds_name_to_annotation_level['Wellington Camera Traps'] = 'sequence'
59
59
  ds_name_to_annotation_level['NACTI'] = 'unknown'
60
+ ds_name_to_annotation_level['Seattle(ish) Camera Traps'] = 'image'
60
61
 
61
62
  known_unmapped_labels = set(['WCS Camera Traps:#ref!'])
62
63
 
@@ -103,7 +104,7 @@ for i_row,row in taxonomy_df.iterrows():
103
104
 
104
105
  #%% Process annotations for each dataset
105
106
 
106
- # Takes several hours
107
+ # Takes a few hours
107
108
 
108
109
  # The order of these headers needs to match the order in which fields are added later in this cell;
109
110
  # don't mess with this order.
@@ -173,7 +174,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
173
174
  expected_annotation_level = None
174
175
 
175
176
  # im = images[10]
176
- for i_image,im in enumerate(images):
177
+ for i_image,im in tqdm(enumerate(images),total=len(images)):
177
178
 
178
179
  if (debug_max_images_per_dataset is not None) and (debug_max_images_per_dataset > 0) \
179
180
  and (i_image >= debug_max_images_per_dataset):
@@ -52,6 +52,8 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
52
52
 
53
53
  #%% Download and extract metadata and MD results for each dataset
54
54
 
55
+ # Takes ~60 seconds if everything needs to beo downloaded and unzipped
56
+
55
57
  for ds_name in metadata_table.keys():
56
58
 
57
59
  # Download the main metadata file for this dataset
@@ -73,10 +75,12 @@ for ds_name in metadata_table.keys():
73
75
  json_url=md_results_url,
74
76
  force_download=force_download)
75
77
 
78
+ # ...for each dataset
79
+
76
80
 
77
81
  #%% Build up a list of URLs to test
78
82
 
79
- # Takes ~15 mins, since it has to open all the giant .json files
83
+ # Takes ~15 mins, since it has to open all the giant .json files.
80
84
 
81
85
  url_to_source = {}
82
86
 
@@ -16,7 +16,7 @@ path. No attempt is made to be consistent in format across the two approaches.
16
16
  import os
17
17
  import subprocess
18
18
  import json
19
- from datetime import date, datetime
19
+ from datetime import datetime
20
20
 
21
21
  from multiprocessing.pool import ThreadPool as ThreadPool
22
22
  from multiprocessing.pool import Pool as Pool
@@ -27,6 +27,7 @@ from PIL import Image, ExifTags
27
27
  from megadetector.utils.path_utils import find_images, is_executable
28
28
  from megadetector.utils.ct_utils import args_to_object
29
29
  from megadetector.utils.ct_utils import image_file_to_camera_folder
30
+ from megadetector.data_management.cct_json_utils import write_object_with_serialized_datetimes
30
31
 
31
32
  debug_max_images = None
32
33
 
@@ -88,8 +89,8 @@ class ReadExifOptions:
88
89
 
89
90
  class ExifResultsToCCTOptions:
90
91
  """
91
- Options controlling the behavior of exif_results_to_cct() (which reformats the datetime information)
92
- extracted by read_exif_from_folder().
92
+ Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
93
+ information) extracted by read_exif_from_folder().
93
94
  """
94
95
 
95
96
  def __init__(self):
@@ -730,9 +731,11 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
730
731
 
731
732
  Args:
732
733
  exif_results (str or list): the filename (or loaded list) containing the results
733
- from read_exif_from_folder
734
- cct_file (str,optional): the filename to which we should write COCO-Camera-Traps-formatted
735
- data
734
+ from read_exif_from_folder
735
+ cct_output_file (str,optional): the filename to which we should write
736
+ COCO-Camera-Traps-formatted data
737
+ options (ExifResultsToCCTOptions, optional): options guiding the generation
738
+ of the CCT file, particularly location mapping
736
739
 
737
740
  Returns:
738
741
  dict: a COCO Camera Traps dict (with no annotations).
@@ -811,15 +814,8 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
811
814
  d['annotations'] = []
812
815
  d['categories'] = []
813
816
 
814
- def json_serialize_datetime(obj):
815
- if isinstance(obj, (datetime, date)):
816
- return obj.isoformat()
817
- raise TypeError('Object {} (type {}) not serializable'.format(
818
- str(obj),type(obj)))
819
-
820
817
  if cct_output_file is not None:
821
- with open(cct_output_file,'w') as f:
822
- json.dump(d,f,indent=1,default=json_serialize_datetime)
818
+ write_object_with_serialized_datetimes(d,cct_output_file)
823
819
 
824
820
  return d
825
821
 
@@ -1,6 +1,6 @@
1
1
  """
2
2
 
3
- rename_images.py.py
3
+ rename_images.py
4
4
 
5
5
  Copies images from a possibly-nested folder structure to a flat folder structure, including EXIF
6
6
  timestamps in each filename. Loosely equivalent to camtrapR's imageRename() function.