megadetector 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (42) hide show
  1. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
  2. megadetector/classification/merge_classification_detection_output.py +2 -2
  3. megadetector/data_management/coco_to_labelme.py +2 -1
  4. megadetector/data_management/databases/integrity_check_json_db.py +15 -14
  5. megadetector/data_management/databases/subset_json_db.py +49 -21
  6. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
  7. megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
  8. megadetector/data_management/mewc_to_md.py +340 -0
  9. megadetector/data_management/speciesnet_to_md.py +41 -0
  10. megadetector/data_management/yolo_output_to_md_output.py +15 -8
  11. megadetector/detection/process_video.py +24 -7
  12. megadetector/detection/pytorch_detector.py +841 -160
  13. megadetector/detection/run_detector.py +341 -146
  14. megadetector/detection/run_detector_batch.py +307 -70
  15. megadetector/detection/run_inference_with_yolov5_val.py +61 -4
  16. megadetector/detection/tf_detector.py +6 -1
  17. megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
  18. megadetector/postprocessing/compare_batch_results.py +236 -7
  19. megadetector/postprocessing/create_crop_folder.py +358 -0
  20. megadetector/postprocessing/md_to_labelme.py +7 -7
  21. megadetector/postprocessing/md_to_wi.py +40 -0
  22. megadetector/postprocessing/merge_detections.py +1 -1
  23. megadetector/postprocessing/postprocess_batch_results.py +12 -5
  24. megadetector/postprocessing/separate_detections_into_folders.py +32 -4
  25. megadetector/postprocessing/validate_batch_results.py +9 -4
  26. megadetector/utils/ct_utils.py +236 -45
  27. megadetector/utils/directory_listing.py +3 -3
  28. megadetector/utils/gpu_test.py +125 -0
  29. megadetector/utils/md_tests.py +455 -116
  30. megadetector/utils/path_utils.py +43 -2
  31. megadetector/utils/wi_utils.py +2691 -0
  32. megadetector/visualization/visualization_utils.py +95 -18
  33. megadetector/visualization/visualize_db.py +25 -7
  34. megadetector/visualization/visualize_detector_output.py +60 -13
  35. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/METADATA +11 -23
  36. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/RECORD +39 -36
  37. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/WHEEL +1 -1
  38. megadetector/detection/detector_training/__init__.py +0 -0
  39. megadetector/detection/detector_training/model_main_tf2.py +0 -114
  40. megadetector/utils/torch_test.py +0 -32
  41. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/LICENSE +0 -0
  42. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/top_level.txt +0 -0
@@ -57,7 +57,7 @@ from megadetector.utils.ct_utils import is_iterable, split_list_into_fixed_size_
57
57
  from megadetector.utils.path_utils import path_is_abs
58
58
  from megadetector.data_management import yolo_output_to_md_output
59
59
  from megadetector.detection.run_detector import try_download_known_detector
60
- from megadetector.postprocessing.combine_api_outputs import combine_api_output_files
60
+ from megadetector.postprocessing.combine_batch_outputs import combine_batch_output_files
61
61
 
62
62
  default_image_size_with_augmentation = int(1280 * 1.3)
63
63
  default_image_size_with_no_augmentation = 1280
@@ -214,6 +214,64 @@ def _clean_up_temporary_folders(options,
214
214
  print('Warning: using temporary YOLO results folder {}, but not removing it'.format(
215
215
  yolo_results_folder))
216
216
 
217
+
218
+ def get_stats_for_category(filename,category='all'):
219
+ """
220
+ Retrieve statistics for a category from the YOLO console output
221
+ stored in [filenam].
222
+
223
+ Args:
224
+ filename (str): a text file containing console output from a YOLO val run
225
+ category (optional, str): a category name
226
+
227
+ Returns:
228
+ dict: a dict with fields n_images, n_labels, P, R, mAP50, and mAP50-95
229
+ """
230
+
231
+ with open(filename,'r',encoding='utf-8') as f:
232
+ lines = f.readlines()
233
+
234
+ # This is just a hedge to make sure there isn't some YOLO version floating
235
+ # around that used different IoU thresholds in the console output.
236
+ found_map50 = False
237
+ found_map5095 = False
238
+
239
+ for line in lines:
240
+
241
+ s = line.strip()
242
+
243
+ if ' map50 ' in s.lower() or ' map@.5 ' in s.lower():
244
+ found_map50 = True
245
+ if 'map50-95' in s.lower() or 'map@.5:.95' in s.lower():
246
+ found_map5095 = True
247
+
248
+ if not s.startswith(category):
249
+ continue
250
+
251
+ tokens = s.split(' ')
252
+ tokens_filtered = list(filter(None,tokens))
253
+
254
+ if len(tokens_filtered) != 7:
255
+ continue
256
+
257
+ assert found_map50 and found_map5095, \
258
+ 'Parsing error in YOLO console output file {}'.format(filename)
259
+
260
+ to_return = {}
261
+ to_return['category'] = category
262
+ assert category == tokens_filtered[0]
263
+ to_return['n_images'] = int(tokens_filtered[1])
264
+ to_return['n_labels'] = int(tokens_filtered[2])
265
+ to_return['P'] = float(tokens_filtered[3])
266
+ to_return['R'] = float(tokens_filtered[4])
267
+ to_return['mAP50'] = float(tokens_filtered[5])
268
+ to_return['mAP50-95'] = float(tokens_filtered[6])
269
+ return to_return
270
+
271
+ # ...for each line
272
+
273
+ return None
274
+
217
275
 
218
276
  #%% Main function
219
277
 
@@ -478,7 +536,7 @@ def run_inference_with_yolo_val(options):
478
536
  # ...for each chunk
479
537
 
480
538
  # Merge
481
- _ = combine_api_output_files(input_files=chunk_output_files,
539
+ _ = combine_batch_output_files(input_files=chunk_output_files,
482
540
  output_file=options.output_file,
483
541
  require_uniqueness=True,
484
542
  verbose=True)
@@ -644,8 +702,7 @@ def run_inference_with_yolo_val(options):
644
702
  assert len(category_ids) == 1 + category_ids[-1]
645
703
 
646
704
  yolo_dataset_file = os.path.join(yolo_results_folder,'dataset.yaml')
647
- yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
648
-
705
+ yolo_image_list_file = os.path.join(yolo_results_folder,'images.txt')
649
706
 
650
707
  with open(yolo_image_list_file,'w') as f:
651
708
 
@@ -36,10 +36,15 @@ class TFDetector:
36
36
  BATCH_SIZE = 1
37
37
 
38
38
 
39
- def __init__(self, model_path):
39
+ def __init__(self, model_path, detector_options=None):
40
40
  """
41
41
  Loads a model from [model_path] and starts a tf.Session with this graph. Obtains
42
42
  input and output tensor handles.
43
+
44
+ Args:
45
+ model_path (str): path to .pdb file
46
+ detector_options (dict, optional): key-value pairs that control detector
47
+ options; currently not used by TFDetector
43
48
  """
44
49
 
45
50
  detection_graph = TFDetector.__load_model(model_path)
@@ -1,8 +1,8 @@
1
1
  """
2
2
 
3
- combine_api_outputs.py
3
+ combine_batch_outputs.py
4
4
 
5
- Merges two or more .json files in batch API output format, optionally
5
+ Merges two or more .json files in MD output format, optionally
6
6
  writing the results to another .json file.
7
7
 
8
8
  * Concatenates image lists, erroring if images are not unique.
@@ -15,10 +15,7 @@ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_pro
15
15
 
16
16
  Command-line use:
17
17
 
18
- combine_api_outputs input1.json input2.json ... inputN.json output.json
19
-
20
- Also see combine_api_shard_files() (not exposed via the command line yet) to
21
- combine the intermediate files created by the API.
18
+ combine_batch_outputs input1.json input2.json ... inputN.json output.json
22
19
 
23
20
  This does no checking for redundancy; if you are looking to ensemble
24
21
  the results of multiple model versions, see merge_detections.py.
@@ -34,7 +31,7 @@ import json
34
31
 
35
32
  #%% Merge functions
36
33
 
37
- def combine_api_output_files(input_files,
34
+ def combine_batch_output_files(input_files,
38
35
  output_file=None,
39
36
  require_uniqueness=True,
40
37
  verbose=True):
@@ -64,7 +61,7 @@ def combine_api_output_files(input_files,
64
61
  input_dicts.append(json.load(f))
65
62
 
66
63
  print_if_verbose('Merging results')
67
- merged_dict = combine_api_output_dictionaries(
64
+ merged_dict = combine_batch_output_dictionaries(
68
65
  input_dicts, require_uniqueness=require_uniqueness)
69
66
 
70
67
  print_if_verbose('Writing output to {}'.format(output_file))
@@ -75,7 +72,7 @@ def combine_api_output_files(input_files,
75
72
  return merged_dict
76
73
 
77
74
 
78
- def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
75
+ def combine_batch_output_dictionaries(input_dicts, require_uniqueness=True):
79
76
  """
80
77
  Merges the list of MD results dictionaries [input_dicts] into a single dict.
81
78
  See module header comment for details on merge rules.
@@ -106,7 +103,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
106
103
 
107
104
  for k in input_dict:
108
105
  if k not in known_fields:
109
- raise ValueError(f'Unrecognized API output field: {k}')
106
+ print(f'Warning: unrecognized batch output field: {k}')
110
107
 
111
108
  # Check compatibility of detection categories
112
109
  for cat_id in input_dict['detection_categories']:
@@ -157,7 +154,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
157
154
  assert info_compare['detector'] == info['detector'], (
158
155
  'Incompatible detection versions in merging')
159
156
  assert info_compare['format_version'] == info['format_version'], (
160
- 'Incompatible API output versions in merging')
157
+ 'Incompatible batch output versions in merging')
161
158
  if 'classifier' in info_compare:
162
159
  if 'classifier' in info:
163
160
  assert info['classifier'] == info_compare['classifier']
@@ -179,7 +176,7 @@ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
179
176
  'images': sorted_images}
180
177
  return merged_dict
181
178
 
182
- # ...combine_api_output_files()
179
+ # ...combine_batch_output_files()
183
180
 
184
181
 
185
182
  def combine_api_shard_files(input_files, output_file=None):
@@ -243,7 +240,7 @@ def main():
243
240
  parser.exit()
244
241
 
245
242
  args = parser.parse_args()
246
- combine_api_output_files(args.input_paths, args.output_path)
243
+ combine_batch_output_files(args.input_paths, args.output_path)
247
244
 
248
245
  if __name__ == '__main__':
249
246
  main()
@@ -138,6 +138,9 @@ class BatchComparisonOptions:
138
138
  #: List of filenames to include in the comparison, or None to use all files
139
139
  self.filenames_to_include = None
140
140
 
141
+ #: List of category names to include in the comparison, or None to use all categories
142
+ self.category_names_to_include = None
143
+
141
144
  #: Compare only detections/non-detections, ignore categories (still renders categories)
142
145
  self.class_agnostic_comparison = False
143
146
 
@@ -197,6 +200,10 @@ class BatchComparisonOptions:
197
200
  #: to describe images
198
201
  self.fn_to_display_fn = None
199
202
 
203
+ #: Should we run urllib.parse.quote() on paths before using them as links in the
204
+ #: output page?
205
+ self.parse_link_paths = True
206
+
200
207
  # ...class BatchComparisonOptions
201
208
 
202
209
 
@@ -982,7 +989,32 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
982
989
  if invalid_category_error:
983
990
 
984
991
  continue
985
-
992
+
993
+ # Should we be restricting the comparison to only certain categories?
994
+ if options.category_names_to_include is not None:
995
+
996
+ # Just in case the user provided a single category instead of a list
997
+ if isinstance(options.category_names_to_include,str):
998
+ options.category_names_to_include = [options.category_names_to_include]
999
+
1000
+ category_name_to_id_a = invert_dictionary(detection_categories_a)
1001
+ category_name_to_id_b = invert_dictionary(detection_categories_b)
1002
+ category_ids_to_include_a = []
1003
+ category_ids_to_include_b = []
1004
+
1005
+ for category_name in options.category_names_to_include:
1006
+ if category_name in category_name_to_id_a:
1007
+ category_ids_to_include_a.append(category_name_to_id_a[category_name])
1008
+ if category_name in category_name_to_id_b:
1009
+ category_ids_to_include_b.append(category_name_to_id_b[category_name])
1010
+
1011
+ # Restrict the categories we treat as above-threshold to the set we're supposed
1012
+ # to be using
1013
+ categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
1014
+ category_id in category_ids_to_include_a]
1015
+ categories_above_threshold_b = [category_id for category_id in categories_above_threshold_b if \
1016
+ category_id in category_ids_to_include_b]
1017
+
986
1018
  detection_a = (len(categories_above_threshold_a) > 0)
987
1019
  detection_b = (len(categories_above_threshold_b) > 0)
988
1020
 
@@ -1213,9 +1245,6 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1213
1245
 
1214
1246
  # ...def _categorize_image_with_image_level_gt(...)
1215
1247
 
1216
- # if 'val#human#human#HoSa#2021.006_na#2021#2021.006 (2021)#20210713' in im_a['file']:
1217
- # import pdb; pdb.set_trace()
1218
-
1219
1248
  # im_detection = im_a; category_id_to_threshold = category_id_to_threshold_a
1220
1249
  result_types_present_a = \
1221
1250
  _categorize_image_with_image_level_gt(im_a,im_gt,annotations_gt,category_id_to_threshold_a)
@@ -1360,12 +1389,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1360
1389
 
1361
1390
  title = display_path + ' (max conf {:.2f},{:.2f})'.format(max_conf_a,max_conf_b)
1362
1391
 
1392
+ if options.parse_link_paths:
1393
+ link_target_string = urllib.parse.quote(input_image_absolute_paths[i_fn])
1394
+ else:
1395
+ link_target_string = input_image_absolute_paths[i_fn]
1396
+
1363
1397
  info = {
1364
1398
  'filename': fn,
1365
1399
  'title': title,
1366
1400
  'textStyle': 'font-family:verdana,arial,calibri;font-size:' + \
1367
1401
  '80%;text-align:left;margin-top:20;margin-bottom:5',
1368
- 'linkTarget': urllib.parse.quote(input_image_absolute_paths[i_fn]),
1402
+ 'linkTarget': link_target_string,
1369
1403
  'sort_conf':sort_conf
1370
1404
  }
1371
1405
 
@@ -1575,7 +1609,9 @@ def n_way_comparison(filenames,
1575
1609
  if model_names is not None:
1576
1610
  assert len(model_names) == len(filenames), \
1577
1611
  '[model_names] should be the same length as [filenames]'
1578
-
1612
+
1613
+ options.pairwise_options = []
1614
+
1579
1615
  # Choose all pairwise combinations of the files in [filenames]
1580
1616
  for i, j in itertools.combinations(list(range(0,len(filenames))),2):
1581
1617
 
@@ -1598,7 +1634,200 @@ def n_way_comparison(filenames,
1598
1634
 
1599
1635
  return compare_batch_results(options)
1600
1636
 
1601
- # ...n_way_comparison()
1637
+ # ...def n_way_comparison(...)
1638
+
1639
+
1640
+ def find_image_level_detections_above_threshold(results,threshold=0.2,category_names=None):
1641
+ """
1642
+ Returns images in the set of MD results [results] with detections above
1643
+ a threshold confidence level, optionally only counting certain categories.
1644
+
1645
+ Args:
1646
+ results (str or dict): the set of results, either a .json filename or a results
1647
+ dict
1648
+ threshold (float, optional): the threshold used to determine the target number of
1649
+ detections in [results]
1650
+ category_names (list or str, optional): the list of category names to consider (defaults
1651
+ to using all categories), or the name of a single category.
1652
+
1653
+ Returns:
1654
+ list: the images with above-threshold detections
1655
+ """
1656
+ if isinstance(results,str):
1657
+ with open(results,'r') as f:
1658
+ results = json.load(f)
1659
+
1660
+ category_ids_to_consider = None
1661
+
1662
+ if category_names is not None:
1663
+
1664
+ if isinstance(category_names,str):
1665
+ category_names = [category_names]
1666
+
1667
+ category_id_to_name = results['detection_categories']
1668
+ category_name_to_id = invert_dictionary(category_id_to_name)
1669
+
1670
+ category_ids_to_consider = []
1671
+
1672
+ # category_name = category_names[0]
1673
+ for category_name in category_names:
1674
+ category_id = category_name_to_id[category_name]
1675
+ category_ids_to_consider.append(category_id)
1676
+
1677
+ assert len(category_ids_to_consider) > 0, \
1678
+ 'Category name list did not map to any category IDs'
1679
+
1680
+ images_above_threshold = []
1681
+
1682
+ for im in results['images']:
1683
+
1684
+ if ('detections' in im) and (im['detections'] is not None) and (len(im['detections']) > 0):
1685
+ confidence_values_this_image = [0]
1686
+ for det in im['detections']:
1687
+ if category_ids_to_consider is not None:
1688
+ if det['category'] not in category_ids_to_consider:
1689
+ continue
1690
+ confidence_values_this_image.append(det['conf'])
1691
+ if max(confidence_values_this_image) >= threshold:
1692
+ images_above_threshold.append(im)
1693
+
1694
+ # ...for each image
1695
+
1696
+ return images_above_threshold
1697
+
1698
+ # ...def find_image_level_detections_above_threshold(...)
1699
+
1700
+
1701
+ def find_equivalent_threshold(results_a,
1702
+ results_b,
1703
+ threshold_a=0.2,
1704
+ category_names=None,
1705
+ verbose=False):
1706
+ """
1707
+ Given two sets of detector results, finds the confidence threshold for results_b
1708
+ that produces the same fraction of *images* with detections as threshold_a does for
1709
+ results_a. Uses all categories.
1710
+
1711
+ Args:
1712
+ results_a (str or dict): the first set of results, either a .json filename or a results
1713
+ dict
1714
+ results_b (str or dict): the second set of results, either a .json filename or a results
1715
+ dict
1716
+ threshold_a (float, optional): the threshold used to determine the target number of
1717
+ detections in results_a
1718
+ category_names (list or str, optional): the list of category names to consider (defaults
1719
+ to using all categories), or the name of a single category.
1720
+ verbose (bool, optional): enable additional debug output
1721
+
1722
+ Returns:
1723
+ float: the threshold that - when applied to results_b - produces the same number
1724
+ of image-level detections that results from applying threshold_a to results_a
1725
+ """
1726
+
1727
+ if isinstance(results_a,str):
1728
+ if verbose:
1729
+ print('Loading results from {}'.format(results_a))
1730
+ with open(results_a,'r') as f:
1731
+ results_a = json.load(f)
1732
+
1733
+ if isinstance(results_b,str):
1734
+ if verbose:
1735
+ print('Loading results from {}'.format(results_b))
1736
+ with open(results_b,'r') as f:
1737
+ results_b = json.load(f)
1738
+
1739
+ category_ids_to_consider_a = None
1740
+ category_ids_to_consider_b = None
1741
+
1742
+ if category_names is not None:
1743
+
1744
+ if isinstance(category_names,str):
1745
+ category_names = [category_names]
1746
+
1747
+ categories_a = results_a['detection_categories']
1748
+ categories_b = results_b['detection_categories']
1749
+ category_name_to_id_a = invert_dictionary(categories_a)
1750
+ category_name_to_id_b = invert_dictionary(categories_b)
1751
+
1752
+ category_ids_to_consider_a = []
1753
+ category_ids_to_consider_b = []
1754
+
1755
+ # category_name = category_names[0]
1756
+ for category_name in category_names:
1757
+ category_id_a = category_name_to_id_a[category_name]
1758
+ category_id_b = category_name_to_id_b[category_name]
1759
+ category_ids_to_consider_a.append(category_id_a)
1760
+ category_ids_to_consider_b.append(category_id_b)
1761
+
1762
+ assert len(category_ids_to_consider_a) > 0 and len(category_ids_to_consider_b) > 0, \
1763
+ 'Category name list did not map to any category IDs in one or both detection sets'
1764
+
1765
+ def _get_confidence_values_for_results(images,category_ids_to_consider,threshold):
1766
+ """
1767
+ Return a list of the maximum confidence value for each image in [images].
1768
+ Returns zero confidence for images with no detections (or no detections
1769
+ in the specified categories). Does not return anything for invalid images.
1770
+ """
1771
+
1772
+ confidence_values = []
1773
+ images_above_threshold = []
1774
+
1775
+ for im in images:
1776
+ if 'detections' in im and im['detections'] is not None:
1777
+ if len(im['detections']) == 0:
1778
+ confidence_values.append(0)
1779
+ else:
1780
+ confidence_values_this_image = []
1781
+ for det in im['detections']:
1782
+ if category_ids_to_consider is not None:
1783
+ if det['category'] not in category_ids_to_consider:
1784
+ continue
1785
+ confidence_values_this_image.append(det['conf'])
1786
+ if len(confidence_values_this_image) == 0:
1787
+ confidence_values.append(0)
1788
+ else:
1789
+ max_conf_value = max(confidence_values_this_image)
1790
+
1791
+ if threshold is not None and max_conf_value >= threshold:
1792
+ images_above_threshold.append(im)
1793
+ confidence_values.append(max_conf_value)
1794
+ # ...for each image
1795
+
1796
+ return confidence_values, images_above_threshold
1797
+
1798
+ confidence_values_a,images_above_threshold_a = \
1799
+ _get_confidence_values_for_results(results_a['images'],
1800
+ category_ids_to_consider_a,
1801
+ threshold_a)
1802
+
1803
+ # ...def _get_confidence_values_for_results(...)
1804
+
1805
+ if verbose:
1806
+ print('For result set A, considering {} of {} images'.format(
1807
+ len(confidence_values_a),len(results_a['images'])))
1808
+ confidence_values_a_above_threshold = [c for c in confidence_values_a if c >= threshold_a]
1809
+
1810
+ confidence_values_b,_ = _get_confidence_values_for_results(results_b['images'],
1811
+ category_ids_to_consider_b,
1812
+ threshold=None)
1813
+ if verbose:
1814
+ print('For result set B, considering {} of {} images'.format(
1815
+ len(confidence_values_b),len(results_b['images'])))
1816
+ confidence_values_b = sorted(confidence_values_b)
1817
+
1818
+ target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
1819
+
1820
+ detection_cutoff_index = round((1.0-target_detection_fraction) * len(confidence_values_b))
1821
+ threshold_b = confidence_values_b[detection_cutoff_index]
1822
+
1823
+ if verbose:
1824
+ print('{} confidence values above threshold (A)'.format(len(confidence_values_a_above_threshold)))
1825
+ confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
1826
+ print('{} confidence values above threshold (B)'.format(len(confidence_values_b_above_threshold)))
1827
+
1828
+ return threshold_b
1829
+
1830
+ # ...def find_equivalent_threshold(...)
1602
1831
 
1603
1832
 
1604
1833
  #%% Interactive driver