megadetector 5.0.15__py3-none-any.whl → 5.0.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show
  1. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +387 -0
  2. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +28 -16
  3. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -3
  4. megadetector/data_management/lila/test_lila_metadata_urls.py +2 -2
  5. megadetector/data_management/remove_exif.py +61 -36
  6. megadetector/data_management/yolo_to_coco.py +25 -6
  7. megadetector/detection/process_video.py +270 -127
  8. megadetector/detection/pytorch_detector.py +13 -11
  9. megadetector/detection/run_detector.py +9 -2
  10. megadetector/detection/run_detector_batch.py +8 -1
  11. megadetector/detection/run_inference_with_yolov5_val.py +58 -10
  12. megadetector/detection/tf_detector.py +8 -2
  13. megadetector/detection/video_utils.py +214 -18
  14. megadetector/postprocessing/md_to_coco.py +31 -9
  15. megadetector/postprocessing/postprocess_batch_results.py +23 -7
  16. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -2
  17. megadetector/postprocessing/subset_json_detector_output.py +22 -12
  18. megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -3
  19. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +2 -1
  20. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
  21. megadetector/taxonomy_mapping/simple_image_download.py +5 -0
  22. megadetector/taxonomy_mapping/species_lookup.py +1 -1
  23. megadetector/utils/ct_utils.py +48 -0
  24. megadetector/utils/md_tests.py +231 -56
  25. megadetector/utils/path_utils.py +2 -2
  26. megadetector/utils/torch_test.py +32 -0
  27. megadetector/utils/url_utils.py +101 -4
  28. megadetector/visualization/visualization_utils.py +21 -6
  29. megadetector/visualization/visualize_db.py +16 -0
  30. {megadetector-5.0.15.dist-info → megadetector-5.0.17.dist-info}/LICENSE +0 -0
  31. {megadetector-5.0.15.dist-info → megadetector-5.0.17.dist-info}/METADATA +5 -7
  32. {megadetector-5.0.15.dist-info → megadetector-5.0.17.dist-info}/RECORD +34 -32
  33. {megadetector-5.0.15.dist-info → megadetector-5.0.17.dist-info}/WHEEL +1 -1
  34. {megadetector-5.0.15.dist-info → megadetector-5.0.17.dist-info}/top_level.txt +0 -0
@@ -31,11 +31,18 @@ def md_to_coco(md_results_file,
31
31
  validate_image_sizes=False,
32
32
  info=None,
33
33
  preserve_nonstandard_metadata=True,
34
- include_failed_images=True):
34
+ include_failed_images=True,
35
+ include_annotations_without_bounding_boxes=True,
36
+ empty_category_id='0'):
35
37
  """
36
38
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
37
39
  this is an opinionated transformation that requires a confidence threshold.
38
40
 
41
+ The default confidence threshold is not 0; the assumption is that by default, you are
42
+ going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
43
+ file to evaluate a detector, you likely want a default confidence threshold of 0. Confidence
44
+ values will be written to the semi-standard "score" field for each image
45
+
39
46
  A folder of images is required if width and height information are not available
40
47
  in the MD results file.
41
48
 
@@ -54,8 +61,13 @@ def md_to_coco(md_results_file,
54
61
  preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
55
62
  non-standard "conf" field in each annotation, and any random fields present in each image's data
56
63
  (e.g. EXIF metadata) will be propagated to COCO output
57
- include_failed_images (boo, optional): if this is True, failed images will be propagated to COCO output
64
+ include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
58
65
  with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
66
+ include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
67
+ only class labels (no bounding boxes) will be included in the output. If this is False, empty
68
+ images will be represented with no annotations.
69
+ empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
70
+ attached to any bounding boxes
59
71
 
60
72
  Returns:
61
73
  dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
@@ -67,6 +79,8 @@ def md_to_coco(md_results_file,
67
79
 
68
80
  coco_images = []
69
81
  coco_annotations = []
82
+
83
+ print('Converting MD results to COCO...')
70
84
 
71
85
  # im = md_results['images'][0]
72
86
  for im in tqdm(md_results['images']):
@@ -129,13 +143,13 @@ def md_to_coco(md_results_file,
129
143
  coco_category_id = int(md_category_id)
130
144
  ann['category_id'] = coco_category_id
131
145
 
132
- # In very esoteric cases, we use the empty category (0) in MD-formatted output files
133
- if md_category_id != '0':
146
+ if md_category_id != empty_category_id:
134
147
 
135
148
  assert 'bbox' in detection,\
136
149
  'Oops: non-empty category with no bbox in {}'.format(im['file'])
137
150
 
138
151
  ann['bbox'] = detection['bbox']
152
+
139
153
  # MegaDetector: [x,y,width,height] (normalized, origin upper-left)
140
154
  # COCO: [x,y,width,height] (absolute, origin upper-left)
141
155
  ann['bbox'][0] = ann['bbox'][0] * coco_im['width']
@@ -144,13 +158,19 @@ def md_to_coco(md_results_file,
144
158
  ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
145
159
 
146
160
  else:
147
-
148
- print('Warning: empty category annotation in file {}'.format(im['file']))
161
+
162
+ # In very esoteric cases, we use the empty category (0) in MD-formatted output files
163
+ print('Warning: empty category ({}) used for annotation in file {}'.format(
164
+ empty_category_id,im['file']))
165
+ pass
149
166
 
150
167
  if preserve_nonstandard_metadata:
151
- ann['conf'] = detection['conf']
152
-
153
- coco_annotations.append(ann)
168
+ # "Score" is a semi-standard string here, recognized by at least pycocotools
169
+ # ann['conf'] = detection['conf']
170
+ ann['score'] = detection['conf']
171
+
172
+ if 'bbox' in ann or include_annotations_without_bounding_boxes:
173
+ coco_annotations.append(ann)
154
174
 
155
175
  # ...for each detection
156
176
 
@@ -176,6 +196,8 @@ def md_to_coco(md_results_file,
176
196
  'name':md_results['detection_categories'][md_category_id]}
177
197
  output_dict['categories'].append(coco_category)
178
198
 
199
+ print('Writing COCO output file...')
200
+
179
201
  if coco_output_file is not None:
180
202
  with open(coco_output_file,'w') as f:
181
203
  json.dump(output_dict,f,indent=1)
@@ -770,7 +770,7 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
770
770
  if det['conf'] > max_conf:
771
771
  max_conf = det['conf']
772
772
 
773
- if ('classifications' in det):
773
+ if ('classifications' in det) and (len(det['classifications']) > 0):
774
774
 
775
775
  # This is a list of [class,confidence] pairs, sorted by confidence
776
776
  classifications = det['classifications']
@@ -949,6 +949,13 @@ def process_batch_results(options):
949
949
  f'negative, {n_positive} positive, {n_unknown} unknown, '
950
950
  f'{n_ambiguous} ambiguous')
951
951
 
952
+ if n_positive == 0:
953
+ print('\n*** Warning: no positives found in ground truth, analysis won\'t be very meaningful ***\n')
954
+ if n_negative == 0:
955
+ print('\n*** Warning: no negatives found in ground truth, analysis won\'t be very meaningful ***\n')
956
+ if n_ambiguous > 0:
957
+ print('\n*** Warning: {} images with ambiguous positive/negative status found in ground truth ***\n'.format(
958
+ n_ambiguous))
952
959
 
953
960
  ##%% Load detection (and possibly classification) results
954
961
 
@@ -1095,25 +1102,34 @@ def process_batch_results(options):
1095
1102
 
1096
1103
  ##%% Detection evaluation: compute precision/recall
1097
1104
 
1098
- # numpy array of detection probabilities
1105
+ # numpy array of maximum confidence values
1099
1106
  p_detection = detections_df['max_detection_conf'].values
1100
- n_detections = len(p_detection)
1107
+ n_detection_values = len(p_detection)
1101
1108
 
1102
1109
  # numpy array of bools (0.0/1.0), and -1 as null value
1103
- gt_detections = np.zeros(n_detections, dtype=float)
1110
+ gt_detections = np.zeros(n_detection_values, dtype=float)
1104
1111
 
1112
+ n_positive = 0
1113
+ n_negative = 0
1114
+
1105
1115
  for i_detection, fn in enumerate(detector_files):
1116
+
1106
1117
  image_id = ground_truth_indexed_db.filename_to_id[fn]
1107
1118
  image = ground_truth_indexed_db.image_id_to_image[image_id]
1108
1119
  detection_status = image['_detection_status']
1109
1120
 
1110
1121
  if detection_status == DetectionStatus.DS_NEGATIVE:
1111
1122
  gt_detections[i_detection] = 0.0
1123
+ n_negative += 1
1112
1124
  elif detection_status == DetectionStatus.DS_POSITIVE:
1113
1125
  gt_detections[i_detection] = 1.0
1126
+ n_positive += 1
1114
1127
  else:
1115
1128
  gt_detections[i_detection] = -1.0
1116
1129
 
1130
+ print('Of {} ground truth values, found {} positives and {} negatives'.format(
1131
+ len(detections_df),n_positive,n_negative))
1132
+
1117
1133
  # Don't include ambiguous/unknown ground truth in precision/recall analysis
1118
1134
  b_valid_ground_truth = gt_detections >= 0.0
1119
1135
 
@@ -1187,13 +1203,13 @@ def process_batch_results(options):
1187
1203
  # Rows / first index is ground truth, columns / second index is predicted category
1188
1204
  classifier_cm = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
1189
1205
 
1190
- # iDetection = 0; fn = detector_files[iDetection]; print(fn)
1206
+ # i_detection = 0; fn = detector_files[i_detection]; print(fn)
1191
1207
  assert len(detector_files) == len(detections_df)
1192
- for iDetection, fn in enumerate(detector_files):
1208
+ for i_detection, fn in enumerate(detector_files):
1193
1209
 
1194
1210
  image_id = ground_truth_indexed_db.filename_to_id[fn]
1195
1211
  image = ground_truth_indexed_db.image_id_to_image[image_id]
1196
- detections = detections_df['detections'].iloc[iDetection]
1212
+ detections = detections_df['detections'].iloc[i_detection]
1197
1213
  pred_class_ids = [det['classifications'][0][0] \
1198
1214
  for det in detections if 'classifications' in det.keys()]
1199
1215
  pred_classnames = [classification_categories[pd] for pd in pred_class_ids]
@@ -210,9 +210,12 @@ class RepeatDetectionOptions:
210
210
  #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
211
211
  #:
212
212
  #: See ct_utils for a common replacement function that handles most common
213
- #: manufacturer folder names.
213
+ #: manufacturer folder names:
214
+ #:
215
+ #: from megadetector.utils import ct_utils
216
+ #: self.customDirNameFunction = ct_utils.image_file_to_camera_folder
214
217
  self.customDirNameFunction = None
215
-
218
+
216
219
  #: Include only specific folders, mutually exclusive with [excludeFolders]
217
220
  self.includeFolders = None
218
221
 
@@ -124,7 +124,7 @@ class SubsetJsonDetectorOutputOptions:
124
124
  self.remove_failed_images = False
125
125
 
126
126
  #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
127
- #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
127
+ #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
128
128
  #: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
129
129
  #: scenario indeed where you would want to specify both.
130
130
  self.categories_to_keep = None
@@ -517,7 +517,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
517
517
  else:
518
518
 
519
519
  # Map images to unique folders
520
- print('Finding unique folders')
520
+ print('Finding unique folders')
521
521
 
522
522
  folders_to_images = {}
523
523
 
@@ -670,16 +670,26 @@ def main():
670
670
  parser = argparse.ArgumentParser()
671
671
  parser.add_argument('input_file', type=str, help='Input .json filename')
672
672
  parser.add_argument('output_file', type=str, help='Output .json filename')
673
- parser.add_argument('--query', type=str, default=None, help='Query string to search for (omitting this matches all)')
674
- parser.add_argument('--replacement', type=str, default=None, help='Replace [query] with this')
675
- parser.add_argument('--confidence_threshold', type=float, default=None, help='Remove detections below this confidence level')
676
- parser.add_argument('--split_folders', action='store_true', help='Split .json files by leaf-node folder')
677
- parser.add_argument('--split_folder_param', type=int, help='Directory level count for n_from_bottom and n_from_top splitting')
678
- parser.add_argument('--split_folder_mode', type=str, help='Folder level to use for splitting ("top" or "bottom")')
679
- parser.add_argument('--make_folder_relative', action='store_true', help='Make image paths relative to their containing folder (only meaningful with split_folders)')
680
- parser.add_argument('--overwrite_json_files', action='store_true', help='Overwrite output files')
681
- parser.add_argument('--copy_jsons_to_folders', action='store_true', help='When using split_folders and make_folder_relative, copy jsons to their corresponding folders (relative to output_file)')
682
- parser.add_argument('--create_folders', action='store_true', help='When using copy_jsons_to_folders, create folders that don''t exist')
673
+ parser.add_argument('--query', type=str, default=None,
674
+ help='Query string to search for (omitting this matches all)')
675
+ parser.add_argument('--replacement', type=str, default=None,
676
+ help='Replace [query] with this')
677
+ parser.add_argument('--confidence_threshold', type=float, default=None,
678
+ help='Remove detections below this confidence level')
679
+ parser.add_argument('--split_folders', action='store_true',
680
+ help='Split .json files by leaf-node folder')
681
+ parser.add_argument('--split_folder_param', type=int,
682
+ help='Directory level count for n_from_bottom and n_from_top splitting')
683
+ parser.add_argument('--split_folder_mode', type=str,
684
+ help='Folder level to use for splitting ("top" or "bottom")')
685
+ parser.add_argument('--make_folder_relative', action='store_true',
686
+ help='Make image paths relative to their containing folder (only meaningful with split_folders)')
687
+ parser.add_argument('--overwrite_json_files', action='store_true',
688
+ help='Overwrite output files')
689
+ parser.add_argument('--copy_jsons_to_folders', action='store_true',
690
+ help='When using split_folders and make_folder_relative, copy jsons to their corresponding folders (relative to output_file)')
691
+ parser.add_argument('--create_folders', action='store_true',
692
+ help='When using copy_jsons_to_folders, create folders that don''t exist')
683
693
 
684
694
  if len(sys.argv[1:]) == 0:
685
695
  parser.print_help()
@@ -15,10 +15,10 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2024.07.16.csv')
19
19
 
20
20
  datasets_to_map = [
21
- 'Trail Camera Images of New Zealand Animals'
21
+ 'Desert Lion Conservation Camera Traps'
22
22
  ]
23
23
 
24
24
 
@@ -133,7 +133,7 @@ if False:
133
133
  # q = 'white-throated monkey'
134
134
  # q = 'cingulata'
135
135
  # q = 'notamacropus'
136
- q = 'porzana'
136
+ q = 'aves'
137
137
  taxonomy_preference = 'inat'
138
138
  m = get_preferred_taxonomic_match(q,taxonomy_preference)
139
139
  # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
@@ -24,7 +24,7 @@ if False:
24
24
  release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
25
25
  # import clipboard; clipboard.copy(release_taxonomy_file)
26
26
 
27
- # Created by get_lila_category_list.py... contains counts for each category
27
+ # Created by get_lila_annotation_counts.py... contains counts for each category
28
28
  lila_dataset_to_categories_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
29
29
 
30
30
  assert os.path.isfile(lila_dataset_to_categories_file)
@@ -140,3 +140,4 @@ if False:
140
140
 
141
141
  print('Wrote final output to {}'.format(release_taxonomy_file))
142
142
 
143
+ # ...if False
@@ -16,7 +16,7 @@ import os
16
16
  import pandas as pd
17
17
 
18
18
  # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.07.16.csv')
20
20
 
21
21
  preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
22
22
  os.makedirs(preview_base,exist_ok=True)
@@ -8,6 +8,11 @@ Slightly modified from:
8
8
 
9
9
  https://github.com/RiddlerQ/simple_image_download
10
10
 
11
+ pip install python-magic
12
+
13
+ # On Windows, also run:
14
+ pip install python-magic-bin
15
+
11
16
  """
12
17
 
13
18
  #%% Imports
@@ -208,7 +208,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
208
208
  # Load GBIF taxonomy
209
209
  gbif_taxonomy_file = os.path.join(taxonomy_download_dir, 'GBIF', 'Taxon.tsv')
210
210
  print('Loading GBIF taxonomy from {}'.format(gbif_taxonomy_file))
211
- gbif_taxonomy = pd.read_csv(gbif_taxonomy_file, sep='\t')
211
+ gbif_taxonomy = pd.read_csv(gbif_taxonomy_file, sep='\t', encoding='utf-8',on_bad_lines='warn')
212
212
  gbif_taxonomy['scientificName'] = gbif_taxonomy['scientificName'].fillna('').str.strip()
213
213
  gbif_taxonomy['canonicalName'] = gbif_taxonomy['canonicalName'].fillna('').str.strip()
214
214
 
@@ -16,6 +16,8 @@ import os
16
16
  import jsonpickle
17
17
  import numpy as np
18
18
 
19
+ from operator import itemgetter
20
+
19
21
  # List of file extensions we'll consider images; comparisons will be case-insensitive
20
22
  # (i.e., no need to include both .jpg and .JPG on this list).
21
23
  image_extensions = ['.jpg', '.jpeg', '.gif', '.png']
@@ -294,6 +296,29 @@ def get_max_conf(im):
294
296
  return max_conf
295
297
 
296
298
 
299
+ def sort_results_for_image(im):
300
+ """
301
+ Sort classification and detection results in descending order by confidence (in place).
302
+
303
+ Args:
304
+ im (dict): image dictionary in the MD output format (with a 'detections' field)
305
+ """
306
+ if 'detections' not in im or im['detections'] is None:
307
+ return
308
+
309
+ # Sort detections in descending order by confidence
310
+ im['detections'] = sort_list_of_dicts_by_key(im['detections'],k='conf',reverse=True)
311
+
312
+ for det in im['detections']:
313
+
314
+ # Sort classifications (which are (class,conf) tuples) in descending order by confidence
315
+ if 'classifications' in det and \
316
+ (det['classifications'] is not None) and \
317
+ (len(det['classifications']) > 0):
318
+ L = det['classifications']
319
+ det['classifications'] = sorted(L,key=itemgetter(1),reverse=True)
320
+
321
+
297
322
  def point_dist(p1,p2):
298
323
  """
299
324
  Computes the distance between two points, represented as length-two tuples.
@@ -406,6 +431,21 @@ def split_list_into_n_chunks(L, n, chunk_strategy='greedy'):
406
431
  raise ValueError('Invalid chunk strategy: {}'.format(chunk_strategy))
407
432
 
408
433
 
434
+ def sort_list_of_dicts_by_key(L,k,reverse=False):
435
+ """
436
+ Sorts the list of dictionaries [L] by the key [k].
437
+
438
+ Args:
439
+ L (list): list of dictionaries to sort
440
+ k (object, typically str): the sort key
441
+ reverse (bool, optional): whether to sort in reverse (descending) order
442
+
443
+ Returns:
444
+ dict: sorted copy of [d]
445
+ """
446
+ return sorted(L, key=lambda d: d[k], reverse=reverse)
447
+
448
+
409
449
  def sort_dictionary_by_key(d,reverse=False):
410
450
  """
411
451
  Sorts the dictionary [d] by key.
@@ -611,3 +651,11 @@ if False:
611
651
  r1 = [0.4,0.8,10,22]; r2 = [100, 101, 200, 210.4]; assert abs(rect_distance(r1,r2)-119.753) < 0.001
612
652
  r1 = [0.4,0.8,10,22]; r2 = [101, 101, 200, 210.4]; assert abs(rect_distance(r1,r2)-120.507) < 0.001
613
653
  r1 = [0.4,0.8,10,22]; r2 = [120, 120, 200, 210.4]; assert abs(rect_distance(r1,r2)-147.323) < 0.001
654
+
655
+
656
+ #%% Test dictionary sorting
657
+
658
+ L = [{'a':5},{'a':0},{'a':10}]
659
+ k = 'a'
660
+ sort_list_of_dicts_by_key(L, k, reverse=True)
661
+