megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,263 @@
1
+ ########
2
+ #
3
+ # coco_to_labelme.py
4
+ #
5
+ # Converts a COCO dataset to labelme format (one .json per image file).
6
+ #
7
+ # If you want to convert YOLO data to labelme, use yolo_to_coco, then coco_to_labelme.
8
+ #
9
+ ########
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+
16
+ from tqdm import tqdm
17
+ from collections import defaultdict
18
+
19
+ from md_visualization.visualization_utils import open_image
20
+
21
+
22
+ #%% Functions
23
+
24
+ def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
25
+ """
26
+ For the given image struct in COCO format and associated list of annotations, reformat the detections
27
+ into labelme format. Returns a dict. All annotations in this list should point to this image.
28
+
29
+ "categories" is in the standard COCO format.
30
+
31
+ 'height' and 'width' are required in [im].
32
+ """
33
+
34
+ image_base_name = os.path.basename(im['file_name'])
35
+
36
+ output_dict = {}
37
+ if info is not None:
38
+ output_dict['custom_info'] = info
39
+ output_dict['version'] = '5.3.0a0'
40
+ output_dict['flags'] = {}
41
+ output_dict['shapes'] = []
42
+ output_dict['imagePath'] = image_base_name
43
+ output_dict['imageHeight'] = im['height']
44
+ output_dict['imageWidth'] = im['width']
45
+ output_dict['imageData'] = None
46
+
47
+ # Store COCO categories in case we want to reconstruct the original IDs later
48
+ output_dict['coco_categories'] = categories
49
+
50
+ category_id_to_name = {c['id']:c['name'] for c in categories}
51
+
52
+ if 'flags' in im:
53
+ output_dict['flags'] = im['flags']
54
+
55
+ # ann = annotations[0]
56
+ for ann in annotations:
57
+
58
+ if 'bbox' not in ann:
59
+ continue
60
+
61
+ shape = {}
62
+ shape['label'] = category_id_to_name[ann['category_id']]
63
+ shape['shape_type'] = 'rectangle'
64
+ shape['description'] = ''
65
+ shape['group_id'] = None
66
+
67
+ # COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
68
+ #
69
+ # labelme boxes are [[x0,y0],[x1,y1]] (absolute)
70
+ x0 = ann['bbox'][0]
71
+ y0 = ann['bbox'][1]
72
+ x1 = ann['bbox'][0] + ann['bbox'][2]
73
+ y1 = ann['bbox'][1] + ann['bbox'][3]
74
+
75
+ shape['points'] = [[x0,y0],[x1,y1]]
76
+ output_dict['shapes'].append(shape)
77
+
78
+ # ...for each detection
79
+
80
+ return output_dict
81
+
82
+ # ...def get_labelme_dict_for_image()
83
+
84
+
85
+ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
86
+ """
87
+ For all the images in [coco_data] (a dict or a filename), write a .json file in
88
+ labelme format alongside the corresponding relative path within image_base.
89
+ """
90
+
91
+ # Load COCO data if necessary
92
+ if isinstance(coco_data,str):
93
+ with open(coco_data,'r') as f:
94
+ coco_data = json.load(f)
95
+ assert isinstance(coco_data,dict)
96
+
97
+
98
+ ## Read image sizes if necessary
99
+
100
+ if bypass_image_size_check:
101
+
102
+ print('Bypassing size check')
103
+
104
+ else:
105
+
106
+ # TODO: parallelize this loop
107
+
108
+ print('Reading/validating image sizes...')
109
+
110
+ # im = coco_data['images'][0]
111
+ for im in tqdm(coco_data['images']):
112
+
113
+ # Make sure this file exists
114
+ im_full_path = os.path.join(image_base,im['file_name'])
115
+ assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
116
+
117
+ # Load w/h information if necessary
118
+ if 'height' not in im or 'width' not in im:
119
+
120
+ try:
121
+ pil_im = open_image(im_full_path)
122
+ im['width'] = pil_im.width
123
+ im['height'] = pil_im.height
124
+ except Exception:
125
+ print('Warning: cannot open image {}'.format(im_full_path))
126
+ if 'failure' not in im:
127
+ im['failure'] = 'Failure image access'
128
+
129
+ # ...if we need to read w/h information
130
+
131
+ # ...for each image
132
+
133
+ # ...if we need to load image sizes
134
+
135
+
136
+ ## Generate labelme files
137
+
138
+ print('Generating .json files...')
139
+
140
+ image_id_to_annotations = defaultdict(list)
141
+ for ann in coco_data['annotations']:
142
+ image_id_to_annotations[ann['image_id']].append(ann)
143
+
144
+ n_json_files_written = 0
145
+ n_json_files_error = 0
146
+ n_json_files_exist = 0
147
+
148
+ # Write output
149
+ for im in tqdm(coco_data['images']):
150
+
151
+ # Skip this image if it failed to load in whatever system generated this COCO file
152
+ skip_image = False
153
+
154
+ # Errors are represented differently depending on the source
155
+ for error_string in ('failure','error'):
156
+ if (error_string in im) and (im[error_string] is not None):
157
+ if verbose:
158
+ print('Warning: skipping labelme file generation for failed image {}'.format(
159
+ im['file_name']))
160
+ skip_image = True
161
+ n_json_files_error += 1
162
+ break
163
+ if skip_image:
164
+ continue
165
+
166
+ im_full_path = os.path.join(image_base,im['file_name'])
167
+ json_path = os.path.splitext(im_full_path)[0] + '.json'
168
+
169
+ if (not overwrite) and (os.path.isfile(json_path)):
170
+ if verbose:
171
+ print('Skipping existing file {}'.format(json_path))
172
+ n_json_files_exist += 1
173
+ continue
174
+
175
+ annotations_this_image = image_id_to_annotations[im['id']]
176
+ output_dict = get_labelme_dict_for_image_from_coco_record(im,
177
+ annotations_this_image,
178
+ coco_data['categories'],
179
+ info=None)
180
+
181
+ n_json_files_written += 1
182
+ with open(json_path,'w') as f:
183
+ json.dump(output_dict,f,indent=1)
184
+
185
+ # ...for each image
186
+
187
+ print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
188
+ n_json_files_written,n_json_files_error,n_json_files_exist))
189
+
190
+ # ...def coco_to_labelme()
191
+
192
+
193
+ #%% Interactive driver
194
+
195
+ if False:
196
+
197
+ pass
198
+
199
+ #%% Configure options
200
+
201
+ coco_file = \
202
+ r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
203
+ image_folder = os.path.dirname(coco_file)
204
+ overwrite = True
205
+
206
+
207
+ #%% Programmatic execution
208
+
209
+ coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
210
+
211
+
212
+ #%% Command-line execution
213
+
214
+ s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
215
+ if overwrite:
216
+ s += ' --overwrite'
217
+
218
+ print(s)
219
+ import clipboard; clipboard.copy(s)
220
+
221
+
222
+ #%% Opening labelme
223
+
224
+ s = 'python labelme {}'.format(image_folder)
225
+ print(s)
226
+ import clipboard; clipboard.copy(s)
227
+
228
+
229
+ #%% Command-line driver
230
+
231
+ import sys,argparse
232
+
233
+ def main():
234
+
235
+ parser = argparse.ArgumentParser(
236
+ description='Convert a COCO database to labelme annotation format')
237
+
238
+ parser.add_argument(
239
+ 'coco_file',
240
+ type=str,
241
+ help='Path to COCO data file (.json)')
242
+
243
+ parser.add_argument(
244
+ 'image_base',
245
+ type=str,
246
+ help='Path to images (also the output folder)')
247
+
248
+ parser.add_argument(
249
+ '--overwrite',
250
+ action='store_true',
251
+ help='Overwrite existing labelme .json files')
252
+
253
+ if len(sys.argv[1:]) == 0:
254
+ parser.print_help()
255
+ parser.exit()
256
+
257
+ args = parser.parse_args()
258
+
259
+ coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
260
+
261
+
262
+ if __name__ == '__main__':
263
+ main()
@@ -56,6 +56,10 @@ def write_yolo_dataset_file(yolo_dataset_file,
56
56
  class_lines = [s.strip() for s in class_lines]
57
57
  class_list = [s for s in class_lines if len(s) > 0]
58
58
 
59
+ if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
60
+ print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
61
+ yolo_dataset_file))
62
+
59
63
  # Write dataset.yaml
60
64
  with open(yolo_dataset_file,'w') as f:
61
65
 
@@ -89,10 +93,12 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
89
93
  images_to_exclude=None,
90
94
  path_replacement_char='#',
91
95
  category_names_to_exclude=None,
92
- write_output=True):
96
+ category_names_to_include=None,
97
+ write_output=True,
98
+ flatten_paths=True):
93
99
  """
94
- Convert a COCO-formatted dataset to a YOLO-formatted dataset, flattening the dataset
95
- (to a single folder) in the process.
100
+ Convert a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
101
+ dataset to a single folder in the process.
96
102
 
97
103
  If the input and output folders are the same, writes .txt files to the input folder,
98
104
  and neither moves nor modifies images.
@@ -130,6 +136,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
130
136
 
131
137
  ## Validate input
132
138
 
139
+ if category_names_to_include is not None and category_names_to_exclude is not None:
140
+ raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
141
+
133
142
  if output_folder is None:
134
143
  output_folder = input_image_folder
135
144
 
@@ -138,12 +147,16 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
138
147
 
139
148
  if category_names_to_exclude is None:
140
149
  category_names_to_exclude = {}
141
-
150
+
142
151
  assert os.path.isdir(input_image_folder)
143
152
  assert os.path.isfile(input_file)
144
153
  os.makedirs(output_folder,exist_ok=True)
145
154
 
146
-
155
+ if (output_folder == input_image_folder) and (overwrite_images) and \
156
+ (not create_image_and_label_folders) and (not flatten_paths):
157
+ print('Warning: output folder and input folder are the same, disabling overwrite_images')
158
+ overwrite_images = False
159
+
147
160
  ## Read input data
148
161
 
149
162
  with open(input_file,'r') as f:
@@ -180,11 +193,14 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
180
193
  coco_id_to_name = {}
181
194
  yolo_id_to_name = {}
182
195
  coco_category_ids_to_exclude = set()
183
- category_exclusion_warnings_printed = set()
184
196
 
185
197
  for category in data['categories']:
186
198
  coco_id_to_name[category['id']] = category['name']
187
- if (category['name'] in category_names_to_exclude):
199
+ if (category_names_to_include is not None) and \
200
+ (category['name'] not in category_names_to_include):
201
+ coco_category_ids_to_exclude.add(category['id'])
202
+ continue
203
+ elif (category['name'] in category_names_to_exclude):
188
204
  coco_category_ids_to_exclude.add(category['id'])
189
205
  continue
190
206
  assert category['id'] not in coco_id_to_yolo_id
@@ -228,9 +244,13 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
228
244
  tokens = os.path.splitext(im['file_name'])
229
245
  if tokens[1].lower() not in typical_image_extensions:
230
246
  print('Warning: unusual image file name {}'.format(im['file_name']))
231
-
232
- image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
233
- '_' + str(i_image).zfill(6)
247
+
248
+ if flatten_paths:
249
+ image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
250
+ '_' + str(i_image).zfill(6)
251
+ else:
252
+ image_name = tokens[0]
253
+
234
254
  assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
235
255
  image_names.add(image_name)
236
256
 
@@ -293,12 +313,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
293
313
  # This category isn't in our category list. This typically corresponds to whole sets
294
314
  # of images that were excluded from the YOLO set.
295
315
  if ann['category_id'] in coco_category_ids_to_exclude:
296
- category_name = coco_id_to_name[ann['category_id']]
297
- if category_name not in category_exclusion_warnings_printed:
298
- category_exclusion_warnings_printed.add(category_name)
299
- print('Warning: ignoring category {} in image {}'.format(
300
- category_name,image_id),end='')
301
- print('...are you sure you didn\'t mean to exclude this image?')
302
316
  continue
303
317
 
304
318
  yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
@@ -407,30 +421,38 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
407
421
  with open(image_id_to_output_image_json_file,'w') as f:
408
422
  json.dump(image_id_to_output_image_name,f,indent=1)
409
423
 
410
- if (write_output):
411
-
424
+
425
+ if (output_folder == input_image_folder) and (not create_image_and_label_folders):
426
+ print('Creating annotation files (not copying images, input and output folder are the same)')
427
+ else:
412
428
  print('Copying images and creating annotation files')
429
+
430
+ if create_image_and_label_folders:
431
+ dest_image_folder = os.path.join(output_folder,'images')
432
+ dest_txt_folder = os.path.join(output_folder,'labels')
433
+ else:
434
+ dest_image_folder = output_folder
435
+ dest_txt_folder = output_folder
436
+
437
+ source_image_to_dest_image = {}
413
438
 
414
- if create_image_and_label_folders:
415
- dest_image_folder = os.path.join(output_folder,'images')
416
- dest_txt_folder = os.path.join(output_folder,'labels')
417
- else:
418
- dest_image_folder = output_folder
419
- dest_txt_folder = output_folder
420
-
421
- # TODO: parallelize this loop
422
- #
423
- # output_info = images_to_copy[0]
424
- for output_info in tqdm(images_to_copy):
425
-
426
- source_image = output_info['source_image']
427
- dest_image_relative = output_info['dest_image_relative']
428
- dest_txt_relative = output_info['dest_txt_relative']
429
-
430
- dest_image = os.path.join(dest_image_folder,dest_image_relative)
431
- os.makedirs(os.path.dirname(dest_image),exist_ok=True)
439
+ # TODO: parallelize this loop
440
+ #
441
+ # output_info = images_to_copy[0]
442
+ for output_info in tqdm(images_to_copy):
443
+
444
+ source_image = output_info['source_image']
445
+ dest_image_relative = output_info['dest_image_relative']
446
+ dest_txt_relative = output_info['dest_txt_relative']
447
+
448
+ dest_image = os.path.join(dest_image_folder,dest_image_relative)
449
+ dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
450
+
451
+ source_image_to_dest_image[source_image] = dest_image
452
+
453
+ if write_output:
432
454
 
433
- dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
455
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
434
456
  os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
435
457
 
436
458
  if not create_image_and_label_folders:
@@ -438,7 +460,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
438
460
 
439
461
  if (not os.path.isfile(dest_image)) or (overwrite_images):
440
462
  shutil.copyfile(source_image,dest_image)
441
-
463
+
442
464
  bboxes = output_info['bboxes']
443
465
 
444
466
  # Only write an annotation file if there are bounding boxes. Images with
@@ -446,9 +468,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
446
468
  #
447
469
  # https://github.com/ultralytics/yolov5/issues/3218
448
470
  #
449
- # I think this is also true for images with empty annotation files, but
450
- # I'm using the convention suggested on that issue, i.e. hard negatives
451
- # are expressed as images without .txt files.
471
+ # I think this is also true for images with empty .txt files, but
472
+ # I'm using the convention suggested on that issue, i.e. hard
473
+ # negatives are expressed as images without .txt files.
452
474
  if len(bboxes) > 0:
453
475
 
454
476
  with open(dest_txt,'w') as f:
@@ -458,15 +480,17 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
458
480
  assert len(bbox) == 5
459
481
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
460
482
  f.write(s + '\n')
461
-
462
- # ...for each image
483
+
484
+ # ...if we're actually writing output
463
485
 
464
- # ...if we're actually writing output
465
-
466
- return_info = {}
467
- return_info['class_list_filename'] = class_list_filename
486
+ # ...for each image
487
+
488
+ coco_to_yolo_info = {}
489
+ coco_to_yolo_info['class_list_filename'] = class_list_filename
490
+ coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
491
+ coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
468
492
 
469
- return return_info
493
+ return coco_to_yolo_info
470
494
 
471
495
  # ...def coco_to_yolo(...)
472
496