megadetector 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (62) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +278 -197
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  5. api/batch_processing/postprocessing/load_api_results.py +55 -69
  6. api/batch_processing/postprocessing/md_to_labelme.py +1 -0
  7. api/batch_processing/postprocessing/postprocess_batch_results.py +158 -50
  8. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  9. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  10. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  11. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +222 -74
  12. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  13. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  14. classification/prepare_classification_script.py +191 -191
  15. data_management/coco_to_yolo.py +65 -44
  16. data_management/databases/integrity_check_json_db.py +7 -5
  17. data_management/generate_crops_from_cct.py +1 -1
  18. data_management/importers/animl_results_to_md_results.py +2 -2
  19. data_management/importers/noaa_seals_2019.py +1 -1
  20. data_management/importers/zamba_results_to_md_results.py +2 -2
  21. data_management/labelme_to_coco.py +34 -6
  22. data_management/labelme_to_yolo.py +1 -1
  23. data_management/lila/create_lila_blank_set.py +474 -0
  24. data_management/lila/create_lila_test_set.py +2 -1
  25. data_management/lila/create_links_to_md_results_files.py +1 -1
  26. data_management/lila/download_lila_subset.py +46 -21
  27. data_management/lila/generate_lila_per_image_labels.py +23 -14
  28. data_management/lila/get_lila_annotation_counts.py +16 -10
  29. data_management/lila/lila_common.py +14 -11
  30. data_management/lila/test_lila_metadata_urls.py +116 -0
  31. data_management/resize_coco_dataset.py +12 -10
  32. data_management/yolo_output_to_md_output.py +40 -13
  33. data_management/yolo_to_coco.py +34 -21
  34. detection/process_video.py +36 -14
  35. detection/pytorch_detector.py +1 -1
  36. detection/run_detector.py +73 -18
  37. detection/run_detector_batch.py +104 -24
  38. detection/run_inference_with_yolov5_val.py +127 -26
  39. detection/run_tiled_inference.py +153 -43
  40. detection/video_utils.py +3 -1
  41. md_utils/ct_utils.py +79 -3
  42. md_utils/md_tests.py +253 -15
  43. md_utils/path_utils.py +129 -24
  44. md_utils/process_utils.py +26 -7
  45. md_utils/split_locations_into_train_val.py +215 -0
  46. md_utils/string_utils.py +10 -0
  47. md_utils/url_utils.py +0 -2
  48. md_utils/write_html_image_list.py +1 -0
  49. md_visualization/visualization_utils.py +17 -2
  50. md_visualization/visualize_db.py +8 -0
  51. md_visualization/visualize_detector_output.py +185 -104
  52. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/METADATA +2 -2
  53. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/RECORD +62 -58
  54. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  55. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  56. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  57. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  58. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  59. taxonomy_mapping/species_lookup.py +33 -13
  60. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  61. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  62. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
@@ -89,10 +89,12 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
89
89
  images_to_exclude=None,
90
90
  path_replacement_char='#',
91
91
  category_names_to_exclude=None,
92
- write_output=True):
92
+ category_names_to_include=None,
93
+ write_output=True,
94
+ flatten_paths=True):
93
95
  """
94
- Convert a COCO-formatted dataset to a YOLO-formatted dataset, flattening the dataset
95
- (to a single folder) in the process.
96
+ Convert a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
97
+ dataset to a single folder in the process.
96
98
 
97
99
  If the input and output folders are the same, writes .txt files to the input folder,
98
100
  and neither moves nor modifies images.
@@ -130,6 +132,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
130
132
 
131
133
  ## Validate input
132
134
 
135
+ if category_names_to_include is not None and category_names_to_exclude is not None:
136
+ raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
137
+
133
138
  if output_folder is None:
134
139
  output_folder = input_image_folder
135
140
 
@@ -138,12 +143,16 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
138
143
 
139
144
  if category_names_to_exclude is None:
140
145
  category_names_to_exclude = {}
141
-
146
+
142
147
  assert os.path.isdir(input_image_folder)
143
148
  assert os.path.isfile(input_file)
144
149
  os.makedirs(output_folder,exist_ok=True)
145
150
 
146
-
151
+ if (output_folder == input_image_folder) and (overwrite_images) and \
152
+ (not create_image_and_label_folders) and (not flatten_paths):
153
+ print('Warning: output folder and input folder are the same, disabling overwrite_images')
154
+ overwrite_images = False
155
+
147
156
  ## Read input data
148
157
 
149
158
  with open(input_file,'r') as f:
@@ -184,7 +193,11 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
184
193
 
185
194
  for category in data['categories']:
186
195
  coco_id_to_name[category['id']] = category['name']
187
- if (category['name'] in category_names_to_exclude):
196
+ if (category_names_to_include is not None) and \
197
+ (category['name'] not in category_names_to_include):
198
+ coco_category_ids_to_exclude.add(category['id'])
199
+ continue
200
+ elif (category['name'] in category_names_to_exclude):
188
201
  coco_category_ids_to_exclude.add(category['id'])
189
202
  continue
190
203
  assert category['id'] not in coco_id_to_yolo_id
@@ -228,9 +241,13 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
228
241
  tokens = os.path.splitext(im['file_name'])
229
242
  if tokens[1].lower() not in typical_image_extensions:
230
243
  print('Warning: unusual image file name {}'.format(im['file_name']))
231
-
232
- image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
233
- '_' + str(i_image).zfill(6)
244
+
245
+ if flatten_paths:
246
+ image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
247
+ '_' + str(i_image).zfill(6)
248
+ else:
249
+ image_name = tokens[0]
250
+
234
251
  assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
235
252
  image_names.add(image_name)
236
253
 
@@ -293,12 +310,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
293
310
  # This category isn't in our category list. This typically corresponds to whole sets
294
311
  # of images that were excluded from the YOLO set.
295
312
  if ann['category_id'] in coco_category_ids_to_exclude:
296
- category_name = coco_id_to_name[ann['category_id']]
297
- if category_name not in category_exclusion_warnings_printed:
298
- category_exclusion_warnings_printed.add(category_name)
299
- print('Warning: ignoring category {} in image {}'.format(
300
- category_name,image_id),end='')
301
- print('...are you sure you didn\'t mean to exclude this image?')
302
313
  continue
303
314
 
304
315
  yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
@@ -407,30 +418,38 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
407
418
  with open(image_id_to_output_image_json_file,'w') as f:
408
419
  json.dump(image_id_to_output_image_name,f,indent=1)
409
420
 
410
- if (write_output):
411
-
421
+
422
+ if (output_folder == input_image_folder) and (not create_image_and_label_folders):
423
+ print('Creating annotation files (not copying images, input and output folder are the same)')
424
+ else:
412
425
  print('Copying images and creating annotation files')
426
+
427
+ if create_image_and_label_folders:
428
+ dest_image_folder = os.path.join(output_folder,'images')
429
+ dest_txt_folder = os.path.join(output_folder,'labels')
430
+ else:
431
+ dest_image_folder = output_folder
432
+ dest_txt_folder = output_folder
433
+
434
+ source_image_to_dest_image = {}
413
435
 
414
- if create_image_and_label_folders:
415
- dest_image_folder = os.path.join(output_folder,'images')
416
- dest_txt_folder = os.path.join(output_folder,'labels')
417
- else:
418
- dest_image_folder = output_folder
419
- dest_txt_folder = output_folder
420
-
421
- # TODO: parallelize this loop
422
- #
423
- # output_info = images_to_copy[0]
424
- for output_info in tqdm(images_to_copy):
425
-
426
- source_image = output_info['source_image']
427
- dest_image_relative = output_info['dest_image_relative']
428
- dest_txt_relative = output_info['dest_txt_relative']
429
-
430
- dest_image = os.path.join(dest_image_folder,dest_image_relative)
431
- os.makedirs(os.path.dirname(dest_image),exist_ok=True)
436
+ # TODO: parallelize this loop
437
+ #
438
+ # output_info = images_to_copy[0]
439
+ for output_info in tqdm(images_to_copy):
440
+
441
+ source_image = output_info['source_image']
442
+ dest_image_relative = output_info['dest_image_relative']
443
+ dest_txt_relative = output_info['dest_txt_relative']
444
+
445
+ dest_image = os.path.join(dest_image_folder,dest_image_relative)
446
+ dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
447
+
448
+ source_image_to_dest_image[source_image] = dest_image
449
+
450
+ if write_output:
432
451
 
433
- dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
452
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
434
453
  os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
435
454
 
436
455
  if not create_image_and_label_folders:
@@ -438,7 +457,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
438
457
 
439
458
  if (not os.path.isfile(dest_image)) or (overwrite_images):
440
459
  shutil.copyfile(source_image,dest_image)
441
-
460
+
442
461
  bboxes = output_info['bboxes']
443
462
 
444
463
  # Only write an annotation file if there are bounding boxes. Images with
@@ -458,15 +477,17 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
458
477
  assert len(bbox) == 5
459
478
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
460
479
  f.write(s + '\n')
461
-
462
- # ...for each image
480
+
481
+ # ...if we're actually writing output
463
482
 
464
- # ...if we're actually writing output
465
-
466
- return_info = {}
467
- return_info['class_list_filename'] = class_list_filename
483
+ # ...for each image
484
+
485
+ coco_to_yolo_info = {}
486
+ coco_to_yolo_info['class_list_filename'] = class_list_filename
487
+ coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
488
+ coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
468
489
 
469
- return return_info
490
+ return coco_to_yolo_info
470
491
 
471
492
  # ...def coco_to_yolo(...)
472
493
 
@@ -89,6 +89,9 @@ def integrity_check_json_db(jsonFile, options=None):
89
89
 
90
90
  print(options.__dict__)
91
91
 
92
+ if options.baseDir is None:
93
+ options.baseDir = ''
94
+
92
95
  baseDir = options.baseDir
93
96
 
94
97
 
@@ -116,10 +119,9 @@ def integrity_check_json_db(jsonFile, options=None):
116
119
  annotations = data['annotations']
117
120
  categories = data['categories']
118
121
  # info = data['info']
119
- assert 'info' in data
122
+ assert 'info' in data, 'No info struct in database'
120
123
 
121
- if len(baseDir) > 0:
122
-
124
+ if len(baseDir) > 0:
123
125
  assert os.path.isdir(baseDir), 'Base directory {} does not exist'.format(baseDir)
124
126
 
125
127
 
@@ -139,8 +141,8 @@ def integrity_check_json_db(jsonFile, options=None):
139
141
  assert 'name' in cat
140
142
  assert 'id' in cat
141
143
 
142
- assert isinstance(cat['id'],int), 'Illegal category ID type'
143
- assert isinstance(cat['name'],str), 'Illegal category name type'
144
+ assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
145
+ assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
144
146
 
145
147
  catId = cat['id']
146
148
  catName = cat['name']
@@ -164,4 +164,4 @@ if False:
164
164
  options.box_expansion = 25
165
165
 
166
166
  htmlOutputFile,db = visualize_db(db_path,output_dir,image_base_dir,options)
167
-
167
+
@@ -145,7 +145,7 @@ def main():
145
145
  '--output_file',
146
146
  type=str,
147
147
  default=None,
148
- help='output .json file (defaults to input file appened with ".json")')
148
+ help='output .json file (defaults to input file appended with ".json")')
149
149
 
150
150
  if len(sys.argv[1:]) == 0:
151
151
  parser.print_help()
@@ -157,4 +157,4 @@ def main():
157
157
 
158
158
  if __name__ == '__main__':
159
159
  main()
160
-
160
+
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # noaa_seals_2019.py
4
4
  #
5
- # Prepare the NOAA Artic Seals 2019 metadata for LILA.
5
+ # Prepare the NOAA Arctic Seals 2019 metadata for LILA.
6
6
  #
7
7
  ########
8
8
 
@@ -166,7 +166,7 @@ def main():
166
166
  '--output_file',
167
167
  type=str,
168
168
  default=None,
169
- help='output .json file (defaults to input file appened with ".json")')
169
+ help='output .json file (defaults to input file appended with ".json")')
170
170
 
171
171
  if len(sys.argv[1:]) == 0:
172
172
  parser.print_help()
@@ -178,4 +178,4 @@ def main():
178
178
 
179
179
  if __name__ == '__main__':
180
180
  main()
181
-
181
+
@@ -20,11 +20,19 @@ from tqdm import tqdm
20
20
 
21
21
  #%% Functions
22
22
 
23
- def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=None,
24
- empty_category_name='empty',empty_category_id=None,info_struct=None,
25
- relative_paths_to_include=None,relative_paths_to_exclude=None,
26
- use_folders_as_labels=False,recursive=True,no_json_handling='skip',
27
- validate_image_sizes=True):
23
+ def labelme_to_coco(input_folder,
24
+ output_file=None,
25
+ category_id_to_category_name=None,
26
+ empty_category_name='empty',
27
+ empty_category_id=None,
28
+ info_struct=None,
29
+ relative_paths_to_include=None,
30
+ relative_paths_to_exclude=None,
31
+ use_folders_as_labels=False,
32
+ recursive=True,
33
+ no_json_handling='skip',
34
+ validate_image_sizes=True,
35
+ right_edge_quantization_threshold=None):
28
36
  """
29
37
  Find all images in [input_folder] that have corresponding .json files, and convert
30
38
  to a COCO .json file.
@@ -48,7 +56,13 @@ def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=N
48
56
 
49
57
  * 'skip': ignore image files with no corresponding .json files
50
58
  * 'empty': treat image files with no corresponding .json files as empty
51
- * 'error': throw an error when an image file has no corresponding .json file
59
+ * 'error': throw an error when an image file has no corresponding .json file
60
+
61
+ right_edge_quantization_threshold is an off-by-default hack to handle cases where
62
+ boxes that really should be running off the right side of the image only extend like 99%
63
+ of the way there, due to what appears to be a slight bias inherent to MD. If a box extends
64
+ within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
65
+ 0.02) of the right edge of the image, it will be extended to the far right edge.
52
66
  """
53
67
 
54
68
  if category_id_to_category_name is None:
@@ -86,6 +100,8 @@ def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=N
86
100
  images = []
87
101
  annotations = []
88
102
 
103
+ n_edges_quantized = 0
104
+
89
105
  # image_fn_relative = image_filenames_relative[0]
90
106
  for image_fn_relative in tqdm(image_filenames_relative):
91
107
 
@@ -182,6 +198,14 @@ def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=N
182
198
  x1 = max(p0[0],p1[0])
183
199
  y0 = min(p0[1],p1[1])
184
200
  y1 = max(p0[1],p1[1])
201
+
202
+ if right_edge_quantization_threshold is not None:
203
+ x1_rel = x1 / (im['width'] - 1)
204
+ right_edge_distance = 1.0 - x1_rel
205
+ if right_edge_distance < right_edge_quantization_threshold:
206
+ n_edges_quantized += 1
207
+ x1 = im['width'] - 1
208
+
185
209
  bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
186
210
  ann = {}
187
211
  ann['id'] = str(uuid.uuid1())
@@ -197,6 +221,10 @@ def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=N
197
221
 
198
222
  # ..for each image
199
223
 
224
+ if n_edges_quantized > 0:
225
+ print('Quantized the right edge in {} of {} images'.format(
226
+ n_edges_quantized,len(image_filenames_relative)))
227
+
200
228
  output_dict = {}
201
229
  output_dict['images'] = images
202
230
  output_dict['annotations'] = annotations
@@ -226,4 +226,4 @@ if False:
226
226
 
227
227
  #%% Command-line driver
228
228
 
229
- # TODO
229
+ # TODO