megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
  6. megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
  7. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  8. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  9. megadetector/data_management/remap_coco_categories.py +60 -11
  10. megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
  11. megadetector/data_management/yolo_to_coco.py +45 -15
  12. megadetector/detection/run_detector.py +1 -0
  13. megadetector/detection/run_detector_batch.py +5 -4
  14. megadetector/postprocessing/classification_postprocessing.py +788 -524
  15. megadetector/postprocessing/compare_batch_results.py +176 -9
  16. megadetector/postprocessing/create_crop_folder.py +420 -0
  17. megadetector/postprocessing/load_api_results.py +4 -1
  18. megadetector/postprocessing/md_to_coco.py +1 -1
  19. megadetector/postprocessing/postprocess_batch_results.py +158 -44
  20. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  21. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  22. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  23. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  24. megadetector/postprocessing/validate_batch_results.py +13 -5
  25. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  27. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  28. megadetector/utils/ct_utils.py +76 -3
  29. megadetector/utils/directory_listing.py +4 -4
  30. megadetector/utils/gpu_test.py +21 -3
  31. megadetector/utils/md_tests.py +142 -49
  32. megadetector/utils/path_utils.py +342 -19
  33. megadetector/utils/wi_utils.py +1286 -212
  34. megadetector/visualization/visualization_utils.py +16 -4
  35. megadetector/visualization/visualize_db.py +1 -1
  36. megadetector/visualization/visualize_detector_output.py +1 -4
  37. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
  38. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
  39. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
  40. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
  41. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
@@ -31,9 +31,10 @@ os.makedirs(metadata_dir,exist_ok=True)
31
31
  md_results_dir = os.path.join(lila_local_base,'md_results')
32
32
  os.makedirs(md_results_dir,exist_ok=True)
33
33
 
34
- md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
34
+ md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw',
35
+ 'md1000-redwood_results_raw','md_results_with_rde']
35
36
 
36
- preferred_cloud = 'gcp' # 'azure', 'aws'
37
+ preferred_cloud = None # 'gcp' # 'azure', 'aws'
37
38
 
38
39
  force_download = True
39
40
 
@@ -52,7 +53,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
52
53
 
53
54
  #%% Download and extract metadata and MD results for each dataset
54
55
 
55
- # Takes ~60 seconds if everything needs to beo downloaded and unzipped
56
+ # Takes ~60 seconds if everything needs to be downloaded and unzipped
56
57
 
57
58
  for ds_name in metadata_table.keys():
58
59
 
@@ -88,6 +89,8 @@ url_to_source = {}
88
89
  # so we pick a semi-arbitrary image that isn't the first. How about the 2000th?
89
90
  image_index = 2000
90
91
 
92
+ # TODO: parallelize this loop
93
+ #
91
94
  # ds_name = list(metadata_table.keys())[0]
92
95
  for ds_name in metadata_table.keys():
93
96
 
@@ -101,13 +104,21 @@ for ds_name in metadata_table.keys():
101
104
  with open(json_filename, 'r') as f:
102
105
  data = json.load(f)
103
106
 
104
- image_base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
105
- assert not image_base_url.endswith('/')
106
- # Download a test image
107
- test_image_relative_path = data['images'][image_index]['file_name']
108
- test_image_url = image_base_url + '/' + test_image_relative_path
109
-
110
- url_to_source[test_image_url] = ds_name + ' metadata'
107
+ if preferred_cloud is not None:
108
+ clouds = [preferred_cloud]
109
+ else:
110
+ clouds = ['gcp','aws','azure']
111
+
112
+ for cloud in clouds:
113
+
114
+ image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
115
+ assert not image_base_url.endswith('/')
116
+
117
+ # Download a test image
118
+ test_image_relative_path = data['images'][image_index]['file_name']
119
+ test_image_url = image_base_url + '/' + test_image_relative_path
120
+
121
+ url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
111
122
 
112
123
  # Grab an image from the MegaDetector results
113
124
 
@@ -12,6 +12,7 @@ import os
12
12
  import json
13
13
 
14
14
  from copy import deepcopy
15
+ from megadetector.utils.ct_utils import invert_dictionary
15
16
 
16
17
 
17
18
  #%% Main function
@@ -19,17 +20,27 @@ from copy import deepcopy
19
20
  def remap_coco_categories(input_data,
20
21
  output_category_name_to_id,
21
22
  input_category_name_to_output_category_name,
22
- output_file=None):
23
+ output_file=None,
24
+ allow_unused_categories=False):
23
25
  """
24
26
  Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
27
  writing the results to a new file.
26
28
 
27
- output_category_name_to_id is a dict mapping strings to ints.
29
+ Args:
30
+ input_data (str or dict): a COCO-formatted dict or a filename. If it's a dict, it will
31
+ be copied, not modified in place.
32
+ output_category_name_to_id (dict) a dict mapping strings to ints. Categories not in
33
+ this dict will be ignored or will result in errors, depending on allow_unused_categories.
34
+ input_category_name_to_output_category_name: a dict mapping strings to strings.
35
+ Annotations using categories not in this dict will be omitted or will result in
36
+ errors, depending on allow_unused_categories.
37
+ output_file (str, optional): output file to which we should write remapped COCO data
38
+ allow_unused_categories (bool, optional): should we ignore categories not present in the
39
+ input/output mappings? If this is False and we encounter an unmapped category, we'll
40
+ error.
28
41
 
29
- input_category_name_to_output_category_name is a dict mapping strings to strings.
30
-
31
- [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
- not modified in place.
42
+ Returns:
43
+ dict: COCO-formatted dict
33
44
  """
34
45
 
35
46
  if isinstance(input_data,str):
@@ -48,23 +59,59 @@ def remap_coco_categories(input_data,
48
59
  input_category_name_to_input_category_id = {}
49
60
  for c in input_data['categories']:
50
61
  input_category_name_to_input_category_id[c['name']] = c['id']
51
-
62
+ input_category_id_to_input_category_name = \
63
+ invert_dictionary(input_category_name_to_input_category_id)
64
+
52
65
  # Map input IDs --> output IDs
53
66
  input_category_id_to_output_category_id = {}
54
- for input_name in input_category_name_to_output_category_name.keys():
67
+ input_category_names = list(input_category_name_to_output_category_name.keys())
68
+
69
+ # input_name = input_category_names[0]
70
+ for input_name in input_category_names:
71
+
55
72
  output_name = input_category_name_to_output_category_name[input_name]
56
73
  assert output_name in output_category_name_to_id, \
57
74
  'No output ID for {} --> {}'.format(input_name,output_name)
58
75
  input_id = input_category_name_to_input_category_id[input_name]
59
76
  output_id = output_category_name_to_id[output_name]
60
77
  input_category_id_to_output_category_id[input_id] = output_id
78
+
79
+ # ...for each category we want to keep
61
80
 
81
+ printed_unused_category_warnings = set()
82
+
83
+ valid_annotations = []
84
+
62
85
  # Map annotations
63
86
  for ann in output_data['annotations']:
64
- assert ann['category_id'] in input_category_id_to_output_category_id, \
65
- 'Unrecognized category ID {}'.format(ann['category_id'])
66
- ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
87
 
88
+ input_category_id = ann['category_id']
89
+ if input_category_id not in input_category_id_to_output_category_id:
90
+ if allow_unused_categories:
91
+ if input_category_id not in printed_unused_category_warnings:
92
+ printed_unused_category_warnings.add(input_category_id)
93
+ input_category_name = \
94
+ input_category_id_to_input_category_name[input_category_id]
95
+ s = 'Skipping unmapped category ID {} ({})'.format(
96
+ input_category_id,input_category_name)
97
+ print(s)
98
+ continue
99
+ else:
100
+ s = 'Unmapped category ID {}'.format(input_category_id)
101
+ raise ValueError(s)
102
+ output_category_id = input_category_id_to_output_category_id[input_category_id]
103
+ ann['category_id'] = output_category_id
104
+ valid_annotations.append(ann)
105
+
106
+ # ...for each annotation
107
+
108
+ # The only reason annotations should get excluded is the case where we allow
109
+ # unused categories
110
+ if not allow_unused_categories:
111
+ assert len(valid_annotations) == len(output_data['annotations'])
112
+
113
+ output_data['annotations'] = valid_annotations
114
+
68
115
  # Update the category list
69
116
  output_categories = []
70
117
  for output_name in output_category_name_to_id:
@@ -78,6 +125,8 @@ def remap_coco_categories(input_data,
78
125
 
79
126
  return input_data
80
127
 
128
+ # ...def remap_coco_categories(...)
129
+
81
130
 
82
131
  #%% Command-line driver
83
132
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  wi_to_md.py
4
4
 
5
- Converts the WI predictions.json format to MD .json format. This is just a
5
+ Converts the WI (SpeciesNet) predictions.json format to MD .json format. This is just a
6
6
  command-line wrapper around utils.wi_utils.generate_md_results_from_predictions_json.
7
7
 
8
8
  """
@@ -20,7 +20,7 @@ def main():
20
20
 
21
21
  parser = argparse.ArgumentParser()
22
22
  parser.add_argument('predictions_json_file', action='store', type=str,
23
- help='.json file to convert from predictions.json format to MD format')
23
+ help='.json file to convert from SpeciesNet predictions.json format to MD format')
24
24
  parser.add_argument('md_results_file', action='store', type=str,
25
25
  help='output file to write in MD format')
26
26
  parser.add_argument('--base_folder', action='store', type=str, default=None,
@@ -34,7 +34,7 @@ def _filename_to_image_id(fn):
34
34
  return fn.replace(' ','_').replace('\\','/')
35
35
 
36
36
 
37
- def _process_image(fn_abs,input_folder,category_id_to_name):
37
+ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
38
38
  """
39
39
  Internal support function for processing one image's labels.
40
40
  """
@@ -42,8 +42,8 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
42
42
  # Create the image object for this image
43
43
  #
44
44
  # Always use forward slashes in image filenames and IDs
45
- fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
46
- image_id = _filename_to_image_id(fn_relative)
45
+ image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
46
+ image_id = _filename_to_image_id(image_fn_relative)
47
47
 
48
48
  # This is done in a separate loop now
49
49
  #
@@ -53,7 +53,7 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
53
53
  # image_ids.add(image_id)
54
54
 
55
55
  im = {}
56
- im['file_name'] = fn_relative
56
+ im['file_name'] = image_fn_relative
57
57
  im['id'] = image_id
58
58
 
59
59
  annotations_this_image = []
@@ -65,14 +65,20 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
65
65
  im['height'] = im_height
66
66
  im['error'] = None
67
67
  except Exception as e:
68
- print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
68
+ print('Warning: error reading {}:\n{}'.format(image_fn_relative,str(e)))
69
69
  im['width'] = -1
70
70
  im['height'] = -1
71
71
  im['error'] = str(e)
72
72
  return (im,annotations_this_image)
73
73
 
74
74
  # Is there an annotation file for this image?
75
- annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
75
+ if label_folder is not None:
76
+ assert input_folder in fn_abs
77
+ label_file_abs_base = fn_abs.replace(input_folder,label_folder)
78
+ else:
79
+ label_file_abs_base = fn_abs
80
+
81
+ annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
76
82
  if not os.path.isfile(annotation_file):
77
83
  annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
78
84
 
@@ -270,9 +276,14 @@ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
270
276
  # ...def validate_label_file(...)
271
277
 
272
278
 
273
- def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
279
+ def validate_yolo_dataset(input_folder,
280
+ class_name_file,
281
+ n_workers=1,
282
+ pool_type='thread',
283
+ verbose=False):
274
284
  """
275
- Verifies all the labels in a YOLO dataset folder.
285
+ Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
286
+ labels and images are in different folders (yolo_to_coco() supports this).
276
287
 
277
288
  Looks for:
278
289
 
@@ -396,14 +407,17 @@ def yolo_to_coco(input_folder,
396
407
  recursive=True,
397
408
  exclude_string=None,
398
409
  include_string=None,
399
- overwrite_handling='overwrite'):
410
+ overwrite_handling='overwrite',
411
+ label_folder=None):
400
412
  """
401
413
  Converts a YOLO-formatted dataset to a COCO-formatted dataset.
402
414
 
403
415
  All images will be assigned an "error" value, usually None.
404
416
 
405
417
  Args:
406
- input_folder (str): the YOLO dataset folder to validate
418
+ input_folder (str): the YOLO dataset folder to convert. If the image and label
419
+ folders are different, this is the image folder, and [label_folder] is the
420
+ label folder.
407
421
  class_name_file (str or list): a list of classes, a flat text file, or a yolo
408
422
  dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
409
423
  input_folder as the base folder, though this is not explicitly checked.
@@ -432,6 +446,7 @@ def yolo_to_coco(input_folder,
432
446
  include_string (str, optional): include only images whose filename contains a string
433
447
  overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
434
448
  'error')
449
+ label_folder (str, optional): label folder, if different from the image folder
435
450
 
436
451
  Returns:
437
452
  dict: COCO-formatted data, the same as what's written to [output_file]
@@ -439,6 +454,8 @@ def yolo_to_coco(input_folder,
439
454
 
440
455
  ## Validate input
441
456
 
457
+ input_folder = input_folder.replace('\\','/')
458
+
442
459
  assert os.path.isdir(input_folder)
443
460
  assert os.path.isfile(class_name_file)
444
461
 
@@ -487,6 +504,7 @@ def yolo_to_coco(input_folder,
487
504
  print('Enumerating images...')
488
505
 
489
506
  image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
507
+ assert not any(['\\' in fn for fn in image_files_abs])
490
508
 
491
509
  n_files_original = len(image_files_abs)
492
510
 
@@ -516,8 +534,14 @@ def yolo_to_coco(input_folder,
516
534
 
517
535
  if not allow_images_without_label_files:
518
536
  print('Verifying that label files exist')
537
+ # image_file_abs = image_files_abs[0]
519
538
  for image_file_abs in tqdm(image_files_abs):
520
- label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
539
+ if label_folder is not None:
540
+ assert input_folder in image_file_abs
541
+ label_file_abs_base = image_file_abs.replace(input_folder,label_folder)
542
+ else:
543
+ label_file_abs_base = image_file_abs
544
+ label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
521
545
  assert os.path.isfile(label_file_abs), \
522
546
  'No annotation file for {}'.format(image_file_abs)
523
547
 
@@ -528,7 +552,7 @@ def yolo_to_coco(input_folder,
528
552
 
529
553
  for fn_abs in tqdm(image_files_abs):
530
554
 
531
- fn_relative = os.path.relpath(fn_abs,input_folder)
555
+ fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
532
556
  image_id = _filename_to_image_id(fn_relative)
533
557
  assert image_id not in image_ids, \
534
558
  'Oops, you have hit a very esoteric case where you have the same filename ' + \
@@ -543,8 +567,12 @@ def yolo_to_coco(input_folder,
543
567
  if n_workers <= 1:
544
568
 
545
569
  image_results = []
570
+ # fn_abs = image_files_abs[0]
546
571
  for fn_abs in tqdm(image_files_abs):
547
- image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
572
+ image_results.append(_process_image(fn_abs,
573
+ input_folder,
574
+ category_id_to_name,
575
+ label_folder))
548
576
 
549
577
  else:
550
578
 
@@ -557,8 +585,10 @@ def yolo_to_coco(input_folder,
557
585
 
558
586
  print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
559
587
 
560
- p = partial(_process_image,input_folder=input_folder,
561
- category_id_to_name=category_id_to_name)
588
+ p = partial(_process_image,
589
+ input_folder=input_folder,
590
+ category_id_to_name=category_id_to_name,
591
+ label_folder=label_folder)
562
592
  image_results = list(tqdm(pool.imap(p, image_files_abs),
563
593
  total=len(image_files_abs)))
564
594
 
@@ -96,6 +96,7 @@ model_string_to_model_version = {
96
96
  'cedar':'v1000.0.0-cedar',
97
97
  'larch':'v1000.0.0-larch',
98
98
  'default':'v5a.0.0',
99
+ 'default-model':'v5a.0.0',
99
100
  'megadetector':'v5a.0.0'
100
101
  }
101
102
 
@@ -735,7 +735,7 @@ def load_and_run_detector_batch(model_file,
735
735
  """
736
736
 
737
737
  # Validate input arguments
738
- if n_cores is None:
738
+ if n_cores is None or n_cores <= 0:
739
739
  n_cores = 1
740
740
 
741
741
  if confidence_threshold is None:
@@ -1331,13 +1331,14 @@ def main():
1331
1331
  parser.add_argument(
1332
1332
  '--ncores',
1333
1333
  type=int,
1334
- default=0,
1335
- help='Number of cores to use for inference; only applies to CPU-based inference')
1334
+ default=1,
1335
+ help='Number of cores to use for inference; only applies to CPU-based inference (default 1)')
1336
1336
  parser.add_argument(
1337
1337
  '--loader_workers',
1338
1338
  type=int,
1339
1339
  default=default_loaders,
1340
- help='Number of image loader workers to use; only relevant when --use_image_queue is set')
1340
+ help='Number of image loader workers to use; only relevant when --use_image_queue ' + \
1341
+ 'is set (default {})'.format(default_loaders))
1341
1342
  parser.add_argument(
1342
1343
  '--class_mapping_filename',
1343
1344
  type=str,