megadetector 5.0.21__py3-none-any.whl → 5.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (37) hide show
  1. megadetector/data_management/cct_json_utils.py +143 -7
  2. megadetector/data_management/cct_to_md.py +12 -5
  3. megadetector/data_management/databases/integrity_check_json_db.py +83 -77
  4. megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
  5. megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
  6. megadetector/data_management/lila/create_lila_test_set.py +25 -11
  7. megadetector/data_management/lila/download_lila_subset.py +9 -2
  8. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
  9. megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
  10. megadetector/data_management/read_exif.py +10 -14
  11. megadetector/data_management/rename_images.py +1 -1
  12. megadetector/detection/process_video.py +14 -3
  13. megadetector/detection/pytorch_detector.py +15 -3
  14. megadetector/detection/run_detector.py +4 -3
  15. megadetector/detection/run_detector_batch.py +2 -2
  16. megadetector/detection/run_inference_with_yolov5_val.py +121 -13
  17. megadetector/detection/video_utils.py +21 -10
  18. megadetector/postprocessing/classification_postprocessing.py +1 -1
  19. megadetector/postprocessing/compare_batch_results.py +931 -142
  20. megadetector/postprocessing/detector_calibration.py +243 -45
  21. megadetector/postprocessing/md_to_coco.py +85 -20
  22. megadetector/postprocessing/postprocess_batch_results.py +0 -1
  23. megadetector/postprocessing/validate_batch_results.py +65 -15
  24. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
  25. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  27. megadetector/utils/ct_utils.py +71 -14
  28. megadetector/utils/md_tests.py +9 -1
  29. megadetector/utils/path_utils.py +14 -7
  30. megadetector/utils/process_utils.py +9 -3
  31. megadetector/utils/write_html_image_list.py +5 -1
  32. megadetector/visualization/visualization_utils.py +211 -87
  33. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/METADATA +19 -18
  34. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/RECORD +37 -36
  35. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/WHEEL +1 -1
  36. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/LICENSE +0 -0
  37. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/top_level.txt +0 -0
@@ -15,8 +15,10 @@ import sys
15
15
  import json
16
16
  import argparse
17
17
 
18
+ from tqdm import tqdm
19
+
18
20
  from megadetector.detection.video_utils import is_video_file
19
- from megadetector.utils.ct_utils import args_to_object
21
+ from megadetector.utils.ct_utils import args_to_object, is_list_sorted # noqa
20
22
 
21
23
  typical_info_fields = ['detector','detection_completion_time',
22
24
  'classifier','classification_completion_time',
@@ -45,6 +47,9 @@ class ValidateBatchResultsOptions:
45
47
 
46
48
  #: Should we return the loaded data, or just the validation results?
47
49
  self.return_data = False
50
+
51
+ #: Enable additional debug output
52
+ self.verbose = False
48
53
 
49
54
  # ...class ValidateBatchResultsOptions
50
55
 
@@ -73,6 +78,9 @@ def validate_batch_results(json_filename,options=None):
73
78
  if options is None:
74
79
  options = ValidateBatchResultsOptions()
75
80
 
81
+ if options.verbose:
82
+ print('Loading results from {}'.format(json_filename))
83
+
76
84
  with open(json_filename,'r') as f:
77
85
  d = json.load(f)
78
86
 
@@ -140,8 +148,11 @@ def validate_batch_results(json_filename,options=None):
140
148
  if not isinstance(d['images'],list):
141
149
  raise ValueError('Invalid images field')
142
150
 
151
+ if options.verbose:
152
+ print('Validating images')
153
+
143
154
  # im = d['images'][0]
144
- for i_im,im in enumerate(d['images']):
155
+ for i_im,im in tqdm(enumerate(d['images']),total=len(d['images']),disable=(not options.verbose)):
145
156
 
146
157
  if not isinstance(im,dict):
147
158
  raise ValueError('Invalid image at index {}'.format(i_im))
@@ -150,34 +161,61 @@ def validate_batch_results(json_filename,options=None):
150
161
 
151
162
  file = im['file']
152
163
 
164
+ if 'detections' in im and im['detections'] is not None:
165
+ for det in im['detections']:
166
+ assert 'category' in det, 'Image {} has a detection with no category'.format(file)
167
+ assert 'conf' in det, 'Image {} has a detection with no confidence'.format(file)
168
+ assert isinstance(det['conf'],float), \
169
+ 'Image {} has an illegal confidence value'.format(file)
170
+ assert 'bbox' in det, 'Image {} has a detection with no box'.format(file)
171
+ assert det['category'] in d['detection_categories'], \
172
+ 'Image {} has a detection with an unmapped category {}'.format(
173
+ file,det['category'])
174
+
153
175
  if options.check_image_existence:
176
+
154
177
  if options.relative_path_base is None:
155
178
  file_abs = file
156
179
  else:
157
180
  file_abs = os.path.join(options.relative_path_base,file)
158
181
  if not os.path.isfile(file_abs):
159
182
  raise ValueError('Cannot find file {}'.format(file_abs))
160
-
161
- if ('detections' not in im) or (im['detections'] is None):
162
- if not ('failure' in im and isinstance(im['failure'],str)):
163
- raise ValueError('Image {} has no detections and no failure'.format(im['file']))
183
+
184
+ if 'failure' in im:
185
+ if im['failure'] is not None:
186
+ if not isinstance(im['failure'],str):
187
+ raise ValueError('Image {} has an illegal [failure] value: {}'.format(
188
+ im['file'],str(im['failure'])))
189
+ if 'detections' not in im:
190
+ s = 'Image {} has a failure value, should also have a null detections array'.format(
191
+ im['file'])
192
+ validation_results['warnings'].append(s)
193
+ elif im['detections'] is not None:
194
+ raise ValueError('Image {} has a failure value but a non-null detections array'.format(
195
+ im['file']))
164
196
  else:
165
197
  if not isinstance(im['detections'],list):
166
198
  raise ValueError('Invalid detections list for image {}'.format(im['file']))
167
-
199
+
168
200
  if is_video_file(im['file']) and (format_version >= 1.4):
201
+
169
202
  if 'frame_rate' not in im:
170
203
  raise ValueError('Video without frame rate: {}'.format(im['file']))
204
+ if im['frame_rate'] < 0:
205
+ raise ValueError('Video with illegal frame rate {}: {}'.format(
206
+ str(im['frame_rate']),im['file']))
171
207
  if 'detections' in im and im['detections'] is not None:
172
208
  for det in im['detections']:
173
209
  if 'frame_number' not in det:
174
210
  raise ValueError('Frame without frame number in video {}'.format(
175
211
  im['file']))
212
+ frame_numbers = [det['frame_number'] for det in im['detections']] # noqa
213
+ # assert is_list_sorted(frame_numbers)
176
214
 
177
215
  # ...for each image
178
216
 
179
217
 
180
- ## Checking on other keys
218
+ ## Validation of other keys
181
219
 
182
220
  for k in d.keys():
183
221
  if (k not in typical_keys) and (k not in required_keys):
@@ -188,6 +226,8 @@ def validate_batch_results(json_filename,options=None):
188
226
 
189
227
  validation_results['errors'].append(str(e))
190
228
 
229
+ # ...try/except
230
+
191
231
  if options.return_data:
192
232
  to_return = d
193
233
  else:
@@ -204,15 +244,25 @@ def validate_batch_results(json_filename,options=None):
204
244
 
205
245
  if False:
206
246
 
207
- #%%
247
+ #%% Validate all .json files in the MD test suite
248
+
249
+ from megadetector.utils.path_utils import recursive_file_list
250
+ filenames = recursive_file_list(os.path.expanduser('~/AppData/Local/Temp/md-tests'))
251
+ filenames = [fn for fn in filenames if fn.endswith('.json')]
252
+ filenames = [fn for fn in filenames if 'detectionIndex' not in fn]
208
253
 
209
254
  options = ValidateBatchResultsOptions()
210
- # json_filename = r'g:\temp\format.json'
211
- # json_filename = r'g:\temp\test-videos\video_results.json'
212
- json_filename = r'g:\temp\test-videos\image_results.json'
213
- options.check_image_existence = True
214
- options.relative_path_base = r'g:\temp\test-videos'
215
- validate_batch_results(json_filename,options)
255
+ options.check_image_existence = False
256
+ options.relative_path_base = None # r'g:\temp\test-videos'
257
+
258
+ for json_filename in filenames:
259
+ results = validate_batch_results(json_filename,options)
260
+ if len(results['validation_results']['warnings']) > 0:
261
+ print('Warnings in file {}:'.format(json_filename))
262
+ for s in results['validation_results']['warnings']:
263
+ print(s)
264
+ print('')
265
+ assert len(results['validation_results']['errors']) == 0
216
266
 
217
267
 
218
268
  #%% Command-line driver
@@ -15,15 +15,17 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2024.12.31.csv')
19
19
 
20
20
  datasets_to_map = [
21
- 'Ohio Small Animals'
21
+ 'Seattle(ish) Camera Traps'
22
22
  ]
23
23
 
24
24
 
25
25
  #%% Initialize taxonomic lookup
26
26
 
27
+ # Takes ~2 mins
28
+
27
29
  from megadetector.taxonomy_mapping.species_lookup import \
28
30
  initialize_taxonomy_lookup, get_preferred_taxonomic_match
29
31
 
@@ -39,27 +41,27 @@ lila_datasets = set()
39
41
 
40
42
  for dataset_name in input_lila_categories.keys():
41
43
  # The script that generates this dictionary creates a separate entry for bounding box
42
- # metadata files, but those don't represent new dataset names
44
+ # metadata files, but those don't represent new dataset names, so we ignore them here.
43
45
  lila_datasets.add(dataset_name.replace('_bbox',''))
44
-
46
+
45
47
  for s in datasets_to_map:
46
48
  assert s in lila_datasets
47
-
48
-
49
+
50
+
49
51
  #%% Find all categories
50
52
 
51
53
  category_mappings = []
52
54
 
53
55
  # dataset_name = datasets_to_map[0]
54
56
  for dataset_name in datasets_to_map:
55
-
57
+
56
58
  ds_categories = input_lila_categories[dataset_name]
57
59
  for category in ds_categories:
58
60
  category_name = category['name']
59
61
  assert ':' not in category_name
60
62
  mapping_name = dataset_name + ':' + category_name
61
63
  category_mappings.append(mapping_name)
62
-
64
+
63
65
  print('Need to create {} mappings'.format(len(category_mappings)))
64
66
 
65
67
 
@@ -128,22 +130,23 @@ output_df.to_csv(output_file, index=None, header=True)
128
130
 
129
131
  if False:
130
132
 
131
- #%%
132
-
133
+ #%% You probably want to open the .csv file first
134
+
133
135
  from megadetector.utils.path_utils import open_file
134
136
  open_file(output_file)
137
+
135
138
 
136
139
  #%%
137
140
 
138
141
  # q = 'white-throated monkey'
139
142
  # q = 'cingulata'
140
143
  # q = 'notamacropus'
141
- q = 'thamnophis saurita saurita'
144
+ q = 'insects'
142
145
  taxonomy_preference = 'inat'
143
146
  m = get_preferred_taxonomic_match(q,taxonomy_preference)
144
147
  # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
145
148
 
146
- if m is None:
149
+ if (m is None) or (len(m.taxonomy_string) == 0):
147
150
  print('No match')
148
151
  else:
149
152
  if m.source != taxonomy_preference:
@@ -89,7 +89,7 @@ if False:
89
89
  'genus',
90
90
  'species','subspecies','variety']
91
91
 
92
- levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex']
92
+ levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex','epifamily']
93
93
 
94
94
  for s in levels_to_exclude:
95
95
  assert s not in levels_to_include
@@ -16,7 +16,7 @@ import os
16
16
  import pandas as pd
17
17
 
18
18
  # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.12.31.csv')
20
20
 
21
21
  preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
22
22
  os.makedirs(preview_base,exist_ok=True)
@@ -399,6 +399,8 @@ images_per_query = 15
399
399
  min_valid_images_per_query = 3
400
400
  min_valid_image_size = 3000
401
401
 
402
+ # TODO: parallelize this loop
403
+ #
402
404
  # i_row = 0; row = df.iloc[i_row]
403
405
  for i_row,row in df.iterrows():
404
406
 
@@ -12,6 +12,7 @@ import inspect
12
12
  import json
13
13
  import math
14
14
  import os
15
+ import builtins
15
16
 
16
17
  import jsonpickle
17
18
  import numpy as np
@@ -547,7 +548,7 @@ def image_file_to_camera_folder(image_fn):
547
548
  # 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
548
549
  # 100_BTCF is the overflow folder style for Browning cameras
549
550
  # 100MEDIA is the overflow folder style used on a number of consumer-grade cameras
550
- patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/','\/\d+MEDIA\/']
551
+ patterns = [r'/\d+RECNX/',r'/\d+EK\d+/',r'/\d+_BTCF/',r'/\d+MEDIA/']
551
552
 
552
553
  image_fn = image_fn.replace('\\','/')
553
554
  for pat in patterns:
@@ -613,6 +614,50 @@ def is_empty(v):
613
614
  return False
614
615
 
615
616
 
617
+ def min_none(a,b):
618
+ """
619
+ Returns the minimum of a and b. If both are None, returns None. If one is None,
620
+ returns the other.
621
+
622
+ Args:
623
+ a (numeric): the first value to compare
624
+ b (numeric): the second value to compare
625
+
626
+ Returns:
627
+ numeric: the minimum of a and b, or None
628
+ """
629
+ if a is None and b is None:
630
+ return None
631
+ elif a is None:
632
+ return b
633
+ elif b is None:
634
+ return a
635
+ else:
636
+ return min(a,b)
637
+
638
+
639
+ def max_none(a,b):
640
+ """
641
+ Returns the maximum of a and b. If both are None, returns None. If one is None,
642
+ returns the other.
643
+
644
+ Args:
645
+ a (numeric): the first value to compare
646
+ b (numeric): the second value to compare
647
+
648
+ Returns:
649
+ numeric: the maximum of a and b, or None
650
+ """
651
+ if a is None and b is None:
652
+ return None
653
+ elif a is None:
654
+ return b
655
+ elif b is None:
656
+ return a
657
+ else:
658
+ return max(a,b)
659
+
660
+
616
661
  def isnan(v):
617
662
  """
618
663
  Returns True if v is a nan-valued float, otherwise returns False.
@@ -645,23 +690,36 @@ def sets_overlap(set1, set2):
645
690
  return not set(set1).isdisjoint(set(set2))
646
691
 
647
692
 
648
-
649
- #%% Test drivers
650
-
651
- if False:
693
+ def is_function_name(s,calling_namespace):
694
+ """
695
+ Determines whether [s] is a callable function in the global or local scope, or a
696
+ built-in function.
652
697
 
653
- pass
698
+ Args:
699
+ s (str): the string to test for function-ness
700
+ calling_namespace (dict): typically pass the output of locals()
701
+ """
702
+
703
+ assert isinstance(s,str), 'Input is not a string'
654
704
 
655
- #%% Test image_file_to_camera_folder()
705
+ return callable(globals().get(s)) or \
706
+ callable(locals().get(s)) or \
707
+ callable(calling_namespace.get(s)) or \
708
+ callable(getattr(builtins, s, None))
709
+
710
+
711
+ def __module_test__():
712
+ """
713
+ Module test driver
714
+ """
656
715
 
657
- relative_path = 'a/b/c/d/100EK113/blah.jpg'
658
- print(image_file_to_camera_folder(relative_path))
716
+ ##%% Camera folder mapping
659
717
 
660
- relative_path = 'a/b/c/d/100RECNX/blah.jpg'
661
- print(image_file_to_camera_folder(relative_path))
718
+ assert image_file_to_camera_folder('a/b/c/d/100EK113/blah.jpg') == 'a/b/c/d'
719
+ assert image_file_to_camera_folder('a/b/c/d/100RECNX/blah.jpg') == 'a/b/c/d'
662
720
 
663
721
 
664
- #%% Test a few rectangle distances
722
+ ##%% Test a few rectangle distances
665
723
 
666
724
  r1 = [0,0,1,1]; r2 = [0,0,1,1]; assert rect_distance(r1,r2)==0
667
725
  r1 = [0,0,1,1]; r2 = [0,0,1,100]; assert rect_distance(r1,r2)==0
@@ -673,9 +731,8 @@ if False:
673
731
  r1 = [0.4,0.8,10,22]; r2 = [120, 120, 200, 210.4]; assert abs(rect_distance(r1,r2)-147.323) < 0.001
674
732
 
675
733
 
676
- #%% Test dictionary sorting
734
+ ##%% Test dictionary sorting
677
735
 
678
736
  L = [{'a':5},{'a':0},{'a':10}]
679
737
  k = 'a'
680
738
  sort_list_of_dicts_by_key(L, k, reverse=True)
681
-
@@ -654,6 +654,14 @@ def run_python_tests(options):
654
654
  download_test_data(options)
655
655
 
656
656
 
657
+ ## Miscellaneous utility tests
658
+
659
+ print('\n** Running ct_utils module test **\n')
660
+
661
+ from megadetector.utils.ct_utils import __module_test__ as ct_utils_test
662
+ ct_utils_test()
663
+
664
+
657
665
  ## Run inference on an image
658
666
 
659
667
  print('\n** Running MD on a single image (module) **\n')
@@ -1210,7 +1218,7 @@ def run_cli_tests(options):
1210
1218
  cmd += ' --overwrite_handling overwrite'
1211
1219
  cmd_results = execute_and_print(cmd)
1212
1220
 
1213
- # Run again with checkpointing, make sure the output are identical
1221
+ # Run again with checkpointing, make sure the outputs are identical
1214
1222
  cmd += ' --checkpoint_frequency 5'
1215
1223
  inference_output_file_yolo_val_checkpoint = \
1216
1224
  os.path.join(options.scratch_dir,'folder_inference_output_yolo_val_checkpoint.json')
@@ -32,6 +32,8 @@ from functools import partial
32
32
  from shutil import which
33
33
  from tqdm import tqdm
34
34
 
35
+ from megadetector.utils.ct_utils import is_iterable
36
+
35
37
  # Should all be lower-case
36
38
  IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
37
39
 
@@ -770,16 +772,21 @@ def parallel_get_file_sizes(filenames,
770
772
 
771
773
  folder_name = None
772
774
 
773
- if verbose:
774
- print('Enumerating files')
775
-
776
- if isinstance(filenames,str) and os.path.isdir(filenames):
777
-
775
+ if isinstance(filenames,str):
776
+
778
777
  folder_name = filenames
778
+ assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
779
779
 
780
+ if verbose:
781
+ print('Enumerating files in {}'.format(folder_name))
782
+
780
783
  # Enumerate absolute paths here, we'll convert to relative later if requested
781
- filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
784
+ filenames = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
782
785
 
786
+ else:
787
+
788
+ assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
789
+
783
790
  if verbose:
784
791
  print('Creating worker pool')
785
792
 
@@ -940,7 +947,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
940
947
  output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
941
948
  overwrite (bool, optional): whether to overwrite an existing .tar file
942
949
  verbose (bool, optional): enable additional debug console output
943
- compresslevel (int, optional): compression level to use, between 0 and 9
950
+ compresslevel (int, optional): compression level to use, between 0 and 9
944
951
 
945
952
  Returns:
946
953
  str: the output zipfile, whether we created it or determined that it already exists
@@ -59,8 +59,13 @@ def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
59
59
  return return_code
60
60
 
61
61
 
62
- def execute_and_print(cmd,print_output=True,encoding=None,errors=None,
63
- env=None,verbose=False,catch_exceptions=True,
62
+ def execute_and_print(cmd,
63
+ print_output=True,
64
+ encoding=None,
65
+ errors=None,
66
+ env=None,
67
+ verbose=False,
68
+ catch_exceptions=True,
64
69
  echo_command=False):
65
70
  """
66
71
  Run [cmd] (a single string) in a shell, capturing and printing output. Returns
@@ -73,7 +78,8 @@ def execute_and_print(cmd,print_output=True,encoding=None,errors=None,
73
78
 
74
79
  Args:
75
80
  cmd (str): command to run
76
- print_output (bool, optional): whether to print output from [cmd]
81
+ print_output (bool, optional): whether to print output from [cmd] (stdout is
82
+ captured regardless of the value of print_output)
77
83
  encoding (str, optional): stdout encoding, see Popen() documentation
78
84
  errors (str, optional): error handling, see Popen() documentation
79
85
  env (dict, optional): environment variables, see Popen() documentation
@@ -44,6 +44,7 @@ def write_html_image_list(filename=None,images=None,options=None):
44
44
  - fHtml (file pointer to write to, used for splitting write operations over multiple calls)
45
45
  - pageTitle (HTML page title)
46
46
  - headerHtml (html text to include before the image list)
47
+ - subPageHeaderHtml (html text to include before the images when images are broken into pages)
47
48
  - trailerHtml (html text to include after the image list)
48
49
  - defaultImageStyle (default css style for images)
49
50
  - defaultTextStyle (default css style for image titles)
@@ -67,6 +68,9 @@ def write_html_image_list(filename=None,images=None,options=None):
67
68
  if 'headerHtml' not in options or options['headerHtml'] is None:
68
69
  options['headerHtml'] = ''
69
70
 
71
+ if 'subPageHeaderHtml' not in options or options['subPageHeaderHtml'] is None:
72
+ options['subPageHeaderHtml'] = ''
73
+
70
74
  if 'trailerHtml' not in options or options['trailerHtml'] is None:
71
75
  options['trailerHtml'] = ''
72
76
 
@@ -152,7 +156,7 @@ def write_html_image_list(filename=None,images=None,options=None):
152
156
  localImages = images[iStart:iEnd+1]
153
157
 
154
158
  localOptions = options.copy();
155
- localOptions['headerHtml'] = '';
159
+ localOptions['headerHtml'] = options['subPageHeaderHtml'];
156
160
  localOptions['trailerHtml'] = '';
157
161
 
158
162
  # Make a recursive call for this image set