megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -24,9 +24,9 @@ import sys
24
24
 
25
25
  from multiprocessing.pool import ThreadPool
26
26
  from operator import itemgetter
27
- from PIL import Image
28
27
  from tqdm import tqdm
29
28
 
29
+ from md_visualization.visualization_utils import open_image
30
30
  from md_utils import ct_utils
31
31
 
32
32
 
@@ -41,6 +41,8 @@ class IntegrityCheckOptions:
41
41
  bRequireLocation = True
42
42
  iMaxNumImages = -1
43
43
  nThreads = 10
44
+ verbose = True
45
+
44
46
 
45
47
  # This is used in a medium-hacky way to share modified options across threads
46
48
  defaultOptions = IntegrityCheckOptions()
@@ -65,7 +67,9 @@ def check_image_existence_and_size(image,options=None):
65
67
  print('Missing image size in {}'.format(filePath))
66
68
  return False
67
69
 
68
- width, height = Image.open(filePath).size
70
+ # width, height = Image.open(filePath).size
71
+ pil_im = open_image(filePath)
72
+ width,height = pil_im.size
69
73
  if (not (width == image['width'] and height == image['height'])):
70
74
  print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
71
75
  image['id'], filePath, image['width'], image['height'], width, height))
@@ -86,9 +90,13 @@ def integrity_check_json_db(jsonFile, options=None):
86
90
 
87
91
  if options.bCheckImageSizes:
88
92
  options.bCheckImageExistence = True
89
-
90
- print(options.__dict__)
93
+
94
+ if options.verbose:
95
+ print(options.__dict__)
91
96
 
97
+ if options.baseDir is None:
98
+ options.baseDir = ''
99
+
92
100
  baseDir = options.baseDir
93
101
 
94
102
 
@@ -102,8 +110,9 @@ def integrity_check_json_db(jsonFile, options=None):
102
110
 
103
111
  assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
104
112
 
105
- print('Reading .json {} with base dir [{}]...'.format(
106
- jsonFile,baseDir))
113
+ if options.verbose:
114
+ print('Reading .json {} with base dir [{}]...'.format(
115
+ jsonFile,baseDir))
107
116
 
108
117
  with open(jsonFile,'r') as f:
109
118
  data = json.load(f)
@@ -116,10 +125,9 @@ def integrity_check_json_db(jsonFile, options=None):
116
125
  annotations = data['annotations']
117
126
  categories = data['categories']
118
127
  # info = data['info']
119
- assert 'info' in data
128
+ assert 'info' in data, 'No info struct in database'
120
129
 
121
- if len(baseDir) > 0:
122
-
130
+ if len(baseDir) > 0:
123
131
  assert os.path.isdir(baseDir), 'Base directory {} does not exist'.format(baseDir)
124
132
 
125
133
 
@@ -131,7 +139,8 @@ def integrity_check_json_db(jsonFile, options=None):
131
139
  catNameToCat = {}
132
140
  imageLocationSet = set()
133
141
 
134
- print('Checking categories...')
142
+ if options.verbose:
143
+ print('Checking categories...')
135
144
 
136
145
  for cat in tqdm(categories):
137
146
 
@@ -139,8 +148,8 @@ def integrity_check_json_db(jsonFile, options=None):
139
148
  assert 'name' in cat
140
149
  assert 'id' in cat
141
150
 
142
- assert isinstance(cat['id'],int), 'Illegal category ID type'
143
- assert isinstance(cat['name'],str), 'Illegal category name type'
151
+ assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
152
+ assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
144
153
 
145
154
  catId = cat['id']
146
155
  catName = cat['name']
@@ -155,11 +164,13 @@ def integrity_check_json_db(jsonFile, options=None):
155
164
 
156
165
  # ...for each category
157
166
 
158
- print('\nChecking images...')
167
+ if options.verbose:
168
+ print('\nChecking images...')
159
169
 
160
170
  if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
161
171
 
162
- print('Trimming image list to {}'.format(options.iMaxNumImages))
172
+ if options.verbose:
173
+ print('Trimming image list to {}'.format(options.iMaxNumImages))
163
174
  images = images[0:options.iMaxNumImages]
164
175
 
165
176
  imagePathsInJson = set()
@@ -215,7 +226,8 @@ def integrity_check_json_db(jsonFile, options=None):
215
226
  # Are we checking for unused images?
216
227
  if (len(baseDir) > 0) and options.bFindUnusedImages:
217
228
 
218
- print('\nEnumerating images...')
229
+ if options.verbose:
230
+ print('\nEnumerating images...')
219
231
 
220
232
  # Recursively enumerate images
221
233
  imagePaths = []
@@ -242,8 +254,9 @@ def integrity_check_json_db(jsonFile, options=None):
242
254
 
243
255
  if len(baseDir) == 0:
244
256
  print('Warning: checking image sizes without a base directory, assuming "."')
245
-
246
- print('Checking image existence and/or image sizes...')
257
+
258
+ if options.verbose:
259
+ print('Checking image existence and/or image sizes...')
247
260
 
248
261
  if options.nThreads is not None and options.nThreads > 1:
249
262
  pool = ThreadPool(options.nThreads)
@@ -263,9 +276,9 @@ def integrity_check_json_db(jsonFile, options=None):
263
276
 
264
277
  # ...for each image
265
278
 
266
- print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
267
-
268
- print('Checking annotations...')
279
+ if options.verbose:
280
+ print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
281
+ print('Checking annotations...')
269
282
 
270
283
  nBoxes = 0
271
284
 
@@ -300,58 +313,56 @@ def integrity_check_json_db(jsonFile, options=None):
300
313
  catIdToCat[ann['category_id']]['_count'] +=1
301
314
 
302
315
  # ...for each annotation
303
-
304
-
305
- ##%% Print statistics
306
316
 
307
- # Find un-annotated images and multi-annotation images
308
- nUnannotated = 0
309
- nMultiAnnotated = 0
317
+ sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
310
318
 
311
- for image in images:
312
- if image['_count'] == 0:
313
- nUnannotated += 1
314
- elif image['_count'] > 1:
315
- nMultiAnnotated += 1
316
-
317
- print('Found {} unannotated images, {} images with multiple annotations'.format(
318
- nUnannotated,nMultiAnnotated))
319
319
 
320
- if (len(baseDir) > 0) and options.bFindUnusedImages:
321
- print('Found {} unused image files'.format(len(unusedFiles)))
322
-
323
- nUnusedCategories = 0
320
+ ##%% Print statistics
324
321
 
325
- # Find unused categories
326
- for cat in categories:
327
- if cat['_count'] == 0:
328
- print('Unused category: {}'.format(cat['name']))
329
- nUnusedCategories += 1
322
+ if options.verbose:
330
323
 
331
- print('Found {} unused categories'.format(nUnusedCategories))
324
+ # Find un-annotated images and multi-annotation images
325
+ nUnannotated = 0
326
+ nMultiAnnotated = 0
327
+
328
+ for image in images:
329
+ if image['_count'] == 0:
330
+ nUnannotated += 1
331
+ elif image['_count'] > 1:
332
+ nMultiAnnotated += 1
333
+
334
+ print('Found {} unannotated images, {} images with multiple annotations'.format(
335
+ nUnannotated,nMultiAnnotated))
336
+
337
+ if (len(baseDir) > 0) and options.bFindUnusedImages:
338
+ print('Found {} unused image files'.format(len(unusedFiles)))
332
339
 
333
- sequenceString = 'no sequence info'
334
- if len(sequences) > 0:
335
- sequenceString = '{} sequences'.format(len(sequences))
340
+ nUnusedCategories = 0
336
341
 
337
- print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
338
- len(images),len(annotations),nBoxes,len(categories),sequenceString))
339
-
340
- if len(imageLocationSet) > 0:
341
- print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
342
-
343
- # Prints a list of categories sorted by count
344
- #
345
- # https://stackoverflow.com/questions/72899/how-do-i-sort-a-list-of-dictionaries-by-a-value-of-the-dictionary
346
-
347
- sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
348
-
349
- print('Categories and annotation (not image) counts:\n')
350
-
351
- for cat in sortedCategories:
352
- print('{:6} {}'.format(cat['_count'],cat['name']))
342
+ # Find unused categories
343
+ for cat in categories:
344
+ if cat['_count'] == 0:
345
+ print('Unused category: {}'.format(cat['name']))
346
+ nUnusedCategories += 1
347
+
348
+ print('Found {} unused categories'.format(nUnusedCategories))
349
+
350
+ sequenceString = 'no sequence info'
351
+ if len(sequences) > 0:
352
+ sequenceString = '{} sequences'.format(len(sequences))
353
+
354
+ print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
355
+ len(images),len(annotations),nBoxes,len(categories),sequenceString))
353
356
 
354
- print('')
357
+ if len(imageLocationSet) > 0:
358
+ print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
359
+
360
+ print('Categories and annotation (not image) counts:\n')
361
+
362
+ for cat in sortedCategories:
363
+ print('{:6} {}'.format(cat['_count'],cat['name']))
364
+
365
+ print('')
355
366
 
356
367
  errorInfo = {}
357
368
  errorInfo['unusedFiles'] = unusedFiles
@@ -65,7 +65,7 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
65
65
  # Write the output file if requested
66
66
  if output_json is not None:
67
67
  print('Writing output .json...')
68
- json.dump(output_data,open(output_json,'w'),indent=4)
68
+ json.dump(output_data,open(output_json,'w'),indent=1)
69
69
 
70
70
  return output_data
71
71
 
@@ -164,4 +164,4 @@ if False:
164
164
  options.box_expansion = 25
165
165
 
166
166
  htmlOutputFile,db = visualize_db(db_path,output_dir,image_base_dir,options)
167
-
167
+
@@ -15,6 +15,8 @@ import os
15
15
  from PIL import Image
16
16
  import sys
17
17
 
18
+ from md_utils.path_utils import find_images
19
+
18
20
  from multiprocessing.pool import ThreadPool
19
21
  from multiprocessing.pool import Pool
20
22
  from functools import partial
@@ -27,7 +29,11 @@ use_threads = False
27
29
 
28
30
  #%% Processing functions
29
31
 
30
- def process_image(image_path,image_prefix=None):
32
+ def _get_image_size(image_path,image_prefix=None):
33
+ """
34
+ Support function to get the size of a single image. Returns a (path,w,h) tuple.
35
+ w and h will be -1 if the image fails to load.
36
+ """
31
37
 
32
38
  if image_prefix is not None:
33
39
  full_path = os.path.join(image_prefix,image_path)
@@ -49,43 +55,56 @@ def process_image(image_path,image_prefix=None):
49
55
  return (image_path,-1,-1)
50
56
 
51
57
 
52
- def process_images(filenames,image_prefix=None,n_threads=default_n_threads):
58
+ def get_image_sizes(filenames,image_prefix=None,output_file=None,
59
+ n_workers=default_n_threads,use_threads=True,
60
+ recursive=True):
61
+ """
62
+ Get the width and height of all images in [filenames], which can be:
63
+
64
+ * A .json-formatted file
65
+ * A folder
66
+ * A list of files
67
+
68
+ ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
69
+ """
70
+
71
+ if output_file is not None:
72
+ assert os.path.isdir(os.path.dirname(output_file)), \
73
+ 'Illegal output file {}, parent folder does not exist'.format(output_file)
74
+
75
+ if isinstance(filenames,str) and os.path.isfile(filenames):
76
+ with open(filenames,'r') as f:
77
+ filenames = json.load(f)
78
+ filenames = [s.strip() for s in filenames]
79
+ elif isinstance(filenames,str) and os.path.isdir(filenames):
80
+ filenames = find_images(filenames,recursive=recursive,
81
+ return_relative_paths=False,convert_slashes=True)
82
+ else:
83
+ assert isinstance(filenames,list)
53
84
 
54
- if n_threads <= 1:
85
+ if n_workers <= 1:
55
86
 
56
87
  all_results = []
57
88
  for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
58
- all_results.append(process_image(fn,image_prefix=image_prefix))
89
+ all_results.append(_get_image_size(fn,image_prefix=image_prefix))
59
90
 
60
91
  else:
61
92
 
62
- print('Creating a pool with {} threads'.format(n_threads))
93
+ print('Creating a pool with {} workers'.format(n_workers))
63
94
  if use_threads:
64
- pool = ThreadPool(n_threads)
95
+ pool = ThreadPool(n_workers)
65
96
  else:
66
- pool = Pool(n_threads)
97
+ pool = Pool(n_workers)
67
98
  # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
68
99
  all_results = list(tqdm(pool.imap(
69
- partial(process_image,image_prefix=image_prefix), filenames), total=len(filenames)))
70
-
71
- return all_results
72
-
73
-
74
- def process_list_file(input_file,output_file=None,image_prefix=None,n_threads=default_n_threads):
75
-
76
- assert os.path.isdir(os.path.dirname(output_file))
77
- assert os.path.isfile(input_file)
78
-
79
- with open(input_file,'r') as f:
80
- filenames = json.load(f)
81
- filenames = [s.strip() for s in filenames]
82
-
83
- all_results = process_images(filenames,image_prefix=image_prefix,n_threads=n_threads)
100
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
84
101
 
85
102
  if output_file is not None:
86
103
  with open(output_file,'w') as f:
87
104
  json.dump(all_results,f,indent=1)
88
-
105
+
106
+ return all_results
107
+
89
108
 
90
109
  #%% Interactive driver
91
110
 
@@ -116,8 +135,7 @@ if False:
116
135
 
117
136
  #%%
118
137
 
119
- # process_list_file(image_list_file,image_size_file,image_prefix=base_dir)
120
- process_list_file(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
138
+ get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
121
139
 
122
140
 
123
141
  #%% Command-line driver
@@ -136,7 +154,7 @@ def main():
136
154
 
137
155
  args = parser.parse_args()
138
156
 
139
- process_list_file(args.input_file,args.output_file,args.image_prefix,args.n_threads)
157
+ _ = get_image_sizes(args.input_file,args.output_file,args.image_prefix,args.n_threads)
140
158
 
141
159
 
142
160
  if __name__ == '__main__':
@@ -39,8 +39,6 @@ def animl_results_to_md_results(input_file,output_file=None):
39
39
  If [output_file] is None, '.json' will be appended to the input file.
40
40
  """
41
41
 
42
- #%%
43
-
44
42
  if output_file is None:
45
43
  output_file = input_file + '.json'
46
44
 
@@ -111,7 +109,7 @@ def animl_results_to_md_results(input_file,output_file=None):
111
109
  with open(output_file,'w') as f:
112
110
  json.dump(results,f,indent=1)
113
111
 
114
- # ...zamba_results_to_md_results(...)
112
+ # ...animl_results_to_md_results(...)
115
113
 
116
114
 
117
115
  #%% Interactive driver
@@ -145,7 +143,7 @@ def main():
145
143
  '--output_file',
146
144
  type=str,
147
145
  default=None,
148
- help='output .json file (defaults to input file appened with ".json")')
146
+ help='output .json file (defaults to input file appended with ".json")')
149
147
 
150
148
  if len(sys.argv[1:]) == 0:
151
149
  parser.print_help()
@@ -157,4 +155,4 @@ def main():
157
155
 
158
156
  if __name__ == '__main__':
159
157
  main()
160
-
158
+
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # noaa_seals_2019.py
4
4
  #
5
- # Prepare the NOAA Artic Seals 2019 metadata for LILA.
5
+ # Prepare the NOAA Arctic Seals 2019 metadata for LILA.
6
6
  #
7
7
  ########
8
8
 
@@ -165,7 +165,7 @@ for b in ir_boxes:
165
165
  use_normalized_coordinates=False,
166
166
  thickness=3)
167
167
 
168
- visualization_utils.show_images_in_a_row([img_rgb,img_ir])
168
+ # visualization_utils.show_images_in_a_row([img_rgb,img_ir])
169
169
 
170
170
 
171
171
  #%% Save images
@@ -166,7 +166,7 @@ def main():
166
166
  '--output_file',
167
167
  type=str,
168
168
  default=None,
169
- help='output .json file (defaults to input file appened with ".json")')
169
+ help='output .json file (defaults to input file appended with ".json")')
170
170
 
171
171
  if len(sys.argv[1:]) == 0:
172
172
  parser.print_help()
@@ -178,4 +178,4 @@ def main():
178
178
 
179
179
  if __name__ == '__main__':
180
180
  main()
181
-
181
+