megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (48) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +28 -14
  2. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  3. api/batch_processing/postprocessing/compare_batch_results.py +1 -1
  4. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  5. api/batch_processing/postprocessing/load_api_results.py +1 -3
  6. api/batch_processing/postprocessing/md_to_labelme.py +118 -51
  7. api/batch_processing/postprocessing/merge_detections.py +30 -5
  8. api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
  9. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
  11. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  12. data_management/cct_json_utils.py +7 -2
  13. data_management/coco_to_labelme.py +263 -0
  14. data_management/coco_to_yolo.py +7 -4
  15. data_management/databases/integrity_check_json_db.py +68 -59
  16. data_management/databases/subset_json_db.py +1 -1
  17. data_management/get_image_sizes.py +44 -26
  18. data_management/importers/animl_results_to_md_results.py +1 -3
  19. data_management/importers/noaa_seals_2019.py +1 -1
  20. data_management/labelme_to_coco.py +252 -143
  21. data_management/labelme_to_yolo.py +95 -52
  22. data_management/lila/create_lila_blank_set.py +106 -23
  23. data_management/lila/download_lila_subset.py +133 -65
  24. data_management/lila/generate_lila_per_image_labels.py +1 -1
  25. data_management/lila/lila_common.py +8 -38
  26. data_management/read_exif.py +65 -16
  27. data_management/remap_coco_categories.py +84 -0
  28. data_management/resize_coco_dataset.py +3 -22
  29. data_management/wi_download_csv_to_coco.py +239 -0
  30. data_management/yolo_to_coco.py +283 -83
  31. detection/run_detector_batch.py +12 -3
  32. detection/run_inference_with_yolov5_val.py +10 -3
  33. detection/run_tiled_inference.py +2 -2
  34. detection/tf_detector.py +2 -1
  35. detection/video_utils.py +1 -1
  36. md_utils/ct_utils.py +22 -3
  37. md_utils/md_tests.py +11 -2
  38. md_utils/path_utils.py +206 -32
  39. md_utils/url_utils.py +66 -1
  40. md_utils/write_html_image_list.py +12 -3
  41. md_visualization/visualization_utils.py +363 -72
  42. md_visualization/visualize_db.py +33 -10
  43. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
  44. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
  45. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  46. md_visualization/visualize_megadb.py +0 -183
  47. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  48. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -56,6 +56,10 @@ def write_yolo_dataset_file(yolo_dataset_file,
56
56
  class_lines = [s.strip() for s in class_lines]
57
57
  class_list = [s for s in class_lines if len(s) > 0]
58
58
 
59
+ if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
60
+ print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
61
+ yolo_dataset_file))
62
+
59
63
  # Write dataset.yaml
60
64
  with open(yolo_dataset_file,'w') as f:
61
65
 
@@ -189,7 +193,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
189
193
  coco_id_to_name = {}
190
194
  yolo_id_to_name = {}
191
195
  coco_category_ids_to_exclude = set()
192
- category_exclusion_warnings_printed = set()
193
196
 
194
197
  for category in data['categories']:
195
198
  coco_id_to_name[category['id']] = category['name']
@@ -465,9 +468,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
465
468
  #
466
469
  # https://github.com/ultralytics/yolov5/issues/3218
467
470
  #
468
- # I think this is also true for images with empty annotation files, but
469
- # I'm using the convention suggested on that issue, i.e. hard negatives
470
- # are expressed as images without .txt files.
471
+ # I think this is also true for images with empty .txt files, but
472
+ # I'm using the convention suggested on that issue, i.e. hard
473
+ # negatives are expressed as images without .txt files.
471
474
  if len(bboxes) > 0:
472
475
 
473
476
  with open(dest_txt,'w') as f:
@@ -24,9 +24,9 @@ import sys
24
24
 
25
25
  from multiprocessing.pool import ThreadPool
26
26
  from operator import itemgetter
27
- from PIL import Image
28
27
  from tqdm import tqdm
29
28
 
29
+ from md_visualization.visualization_utils import open_image
30
30
  from md_utils import ct_utils
31
31
 
32
32
 
@@ -41,6 +41,8 @@ class IntegrityCheckOptions:
41
41
  bRequireLocation = True
42
42
  iMaxNumImages = -1
43
43
  nThreads = 10
44
+ verbose = True
45
+
44
46
 
45
47
  # This is used in a medium-hacky way to share modified options across threads
46
48
  defaultOptions = IntegrityCheckOptions()
@@ -65,7 +67,9 @@ def check_image_existence_and_size(image,options=None):
65
67
  print('Missing image size in {}'.format(filePath))
66
68
  return False
67
69
 
68
- width, height = Image.open(filePath).size
70
+ # width, height = Image.open(filePath).size
71
+ pil_im = open_image(filePath)
72
+ width,height = pil_im.size
69
73
  if (not (width == image['width'] and height == image['height'])):
70
74
  print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
71
75
  image['id'], filePath, image['width'], image['height'], width, height))
@@ -86,8 +90,9 @@ def integrity_check_json_db(jsonFile, options=None):
86
90
 
87
91
  if options.bCheckImageSizes:
88
92
  options.bCheckImageExistence = True
89
-
90
- print(options.__dict__)
93
+
94
+ if options.verbose:
95
+ print(options.__dict__)
91
96
 
92
97
  if options.baseDir is None:
93
98
  options.baseDir = ''
@@ -105,8 +110,9 @@ def integrity_check_json_db(jsonFile, options=None):
105
110
 
106
111
  assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
107
112
 
108
- print('Reading .json {} with base dir [{}]...'.format(
109
- jsonFile,baseDir))
113
+ if options.verbose:
114
+ print('Reading .json {} with base dir [{}]...'.format(
115
+ jsonFile,baseDir))
110
116
 
111
117
  with open(jsonFile,'r') as f:
112
118
  data = json.load(f)
@@ -133,7 +139,8 @@ def integrity_check_json_db(jsonFile, options=None):
133
139
  catNameToCat = {}
134
140
  imageLocationSet = set()
135
141
 
136
- print('Checking categories...')
142
+ if options.verbose:
143
+ print('Checking categories...')
137
144
 
138
145
  for cat in tqdm(categories):
139
146
 
@@ -157,11 +164,13 @@ def integrity_check_json_db(jsonFile, options=None):
157
164
 
158
165
  # ...for each category
159
166
 
160
- print('\nChecking images...')
167
+ if options.verbose:
168
+ print('\nChecking images...')
161
169
 
162
170
  if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
163
171
 
164
- print('Trimming image list to {}'.format(options.iMaxNumImages))
172
+ if options.verbose:
173
+ print('Trimming image list to {}'.format(options.iMaxNumImages))
165
174
  images = images[0:options.iMaxNumImages]
166
175
 
167
176
  imagePathsInJson = set()
@@ -217,7 +226,8 @@ def integrity_check_json_db(jsonFile, options=None):
217
226
  # Are we checking for unused images?
218
227
  if (len(baseDir) > 0) and options.bFindUnusedImages:
219
228
 
220
- print('\nEnumerating images...')
229
+ if options.verbose:
230
+ print('\nEnumerating images...')
221
231
 
222
232
  # Recursively enumerate images
223
233
  imagePaths = []
@@ -244,8 +254,9 @@ def integrity_check_json_db(jsonFile, options=None):
244
254
 
245
255
  if len(baseDir) == 0:
246
256
  print('Warning: checking image sizes without a base directory, assuming "."')
247
-
248
- print('Checking image existence and/or image sizes...')
257
+
258
+ if options.verbose:
259
+ print('Checking image existence and/or image sizes...')
249
260
 
250
261
  if options.nThreads is not None and options.nThreads > 1:
251
262
  pool = ThreadPool(options.nThreads)
@@ -265,9 +276,9 @@ def integrity_check_json_db(jsonFile, options=None):
265
276
 
266
277
  # ...for each image
267
278
 
268
- print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
269
-
270
- print('Checking annotations...')
279
+ if options.verbose:
280
+ print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
281
+ print('Checking annotations...')
271
282
 
272
283
  nBoxes = 0
273
284
 
@@ -302,58 +313,56 @@ def integrity_check_json_db(jsonFile, options=None):
302
313
  catIdToCat[ann['category_id']]['_count'] +=1
303
314
 
304
315
  # ...for each annotation
305
-
306
-
307
- ##%% Print statistics
308
316
 
309
- # Find un-annotated images and multi-annotation images
310
- nUnannotated = 0
311
- nMultiAnnotated = 0
317
+ sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
312
318
 
313
- for image in images:
314
- if image['_count'] == 0:
315
- nUnannotated += 1
316
- elif image['_count'] > 1:
317
- nMultiAnnotated += 1
318
-
319
- print('Found {} unannotated images, {} images with multiple annotations'.format(
320
- nUnannotated,nMultiAnnotated))
321
319
 
322
- if (len(baseDir) > 0) and options.bFindUnusedImages:
323
- print('Found {} unused image files'.format(len(unusedFiles)))
324
-
325
- nUnusedCategories = 0
320
+ ##%% Print statistics
326
321
 
327
- # Find unused categories
328
- for cat in categories:
329
- if cat['_count'] == 0:
330
- print('Unused category: {}'.format(cat['name']))
331
- nUnusedCategories += 1
322
+ if options.verbose:
332
323
 
333
- print('Found {} unused categories'.format(nUnusedCategories))
324
+ # Find un-annotated images and multi-annotation images
325
+ nUnannotated = 0
326
+ nMultiAnnotated = 0
327
+
328
+ for image in images:
329
+ if image['_count'] == 0:
330
+ nUnannotated += 1
331
+ elif image['_count'] > 1:
332
+ nMultiAnnotated += 1
333
+
334
+ print('Found {} unannotated images, {} images with multiple annotations'.format(
335
+ nUnannotated,nMultiAnnotated))
336
+
337
+ if (len(baseDir) > 0) and options.bFindUnusedImages:
338
+ print('Found {} unused image files'.format(len(unusedFiles)))
334
339
 
335
- sequenceString = 'no sequence info'
336
- if len(sequences) > 0:
337
- sequenceString = '{} sequences'.format(len(sequences))
340
+ nUnusedCategories = 0
338
341
 
339
- print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
340
- len(images),len(annotations),nBoxes,len(categories),sequenceString))
341
-
342
- if len(imageLocationSet) > 0:
343
- print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
344
-
345
- # Prints a list of categories sorted by count
346
- #
347
- # https://stackoverflow.com/questions/72899/how-do-i-sort-a-list-of-dictionaries-by-a-value-of-the-dictionary
348
-
349
- sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
350
-
351
- print('Categories and annotation (not image) counts:\n')
352
-
353
- for cat in sortedCategories:
354
- print('{:6} {}'.format(cat['_count'],cat['name']))
342
+ # Find unused categories
343
+ for cat in categories:
344
+ if cat['_count'] == 0:
345
+ print('Unused category: {}'.format(cat['name']))
346
+ nUnusedCategories += 1
347
+
348
+ print('Found {} unused categories'.format(nUnusedCategories))
349
+
350
+ sequenceString = 'no sequence info'
351
+ if len(sequences) > 0:
352
+ sequenceString = '{} sequences'.format(len(sequences))
353
+
354
+ print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
355
+ len(images),len(annotations),nBoxes,len(categories),sequenceString))
355
356
 
356
- print('')
357
+ if len(imageLocationSet) > 0:
358
+ print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
359
+
360
+ print('Categories and annotation (not image) counts:\n')
361
+
362
+ for cat in sortedCategories:
363
+ print('{:6} {}'.format(cat['_count'],cat['name']))
364
+
365
+ print('')
357
366
 
358
367
  errorInfo = {}
359
368
  errorInfo['unusedFiles'] = unusedFiles
@@ -65,7 +65,7 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
65
65
  # Write the output file if requested
66
66
  if output_json is not None:
67
67
  print('Writing output .json...')
68
- json.dump(output_data,open(output_json,'w'),indent=4)
68
+ json.dump(output_data,open(output_json,'w'),indent=1)
69
69
 
70
70
  return output_data
71
71
 
@@ -15,6 +15,8 @@ import os
15
15
  from PIL import Image
16
16
  import sys
17
17
 
18
+ from md_utils.path_utils import find_images
19
+
18
20
  from multiprocessing.pool import ThreadPool
19
21
  from multiprocessing.pool import Pool
20
22
  from functools import partial
@@ -27,7 +29,11 @@ use_threads = False
27
29
 
28
30
  #%% Processing functions
29
31
 
30
- def process_image(image_path,image_prefix=None):
32
+ def _get_image_size(image_path,image_prefix=None):
33
+ """
34
+ Support function to get the size of a single image. Returns a (path,w,h) tuple.
35
+ w and h will be -1 if the image fails to load.
36
+ """
31
37
 
32
38
  if image_prefix is not None:
33
39
  full_path = os.path.join(image_prefix,image_path)
@@ -49,43 +55,56 @@ def process_image(image_path,image_prefix=None):
49
55
  return (image_path,-1,-1)
50
56
 
51
57
 
52
- def process_images(filenames,image_prefix=None,n_threads=default_n_threads):
58
+ def get_image_sizes(filenames,image_prefix=None,output_file=None,
59
+ n_workers=default_n_threads,use_threads=True,
60
+ recursive=True):
61
+ """
62
+ Get the width and height of all images in [filenames], which can be:
63
+
64
+ * A .json-formatted file
65
+ * A folder
66
+ * A list of files
67
+
68
+ ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
69
+ """
70
+
71
+ if output_file is not None:
72
+ assert os.path.isdir(os.path.dirname(output_file)), \
73
+ 'Illegal output file {}, parent folder does not exist'.format(output_file)
74
+
75
+ if isinstance(filenames,str) and os.path.isfile(filenames):
76
+ with open(filenames,'r') as f:
77
+ filenames = json.load(f)
78
+ filenames = [s.strip() for s in filenames]
79
+ elif isinstance(filenames,str) and os.path.isdir(filenames):
80
+ filenames = find_images(filenames,recursive=recursive,
81
+ return_relative_paths=False,convert_slashes=True)
82
+ else:
83
+ assert isinstance(filenames,list)
53
84
 
54
- if n_threads <= 1:
85
+ if n_workers <= 1:
55
86
 
56
87
  all_results = []
57
88
  for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
58
- all_results.append(process_image(fn,image_prefix=image_prefix))
89
+ all_results.append(_get_image_size(fn,image_prefix=image_prefix))
59
90
 
60
91
  else:
61
92
 
62
- print('Creating a pool with {} threads'.format(n_threads))
93
+ print('Creating a pool with {} workers'.format(n_workers))
63
94
  if use_threads:
64
- pool = ThreadPool(n_threads)
95
+ pool = ThreadPool(n_workers)
65
96
  else:
66
- pool = Pool(n_threads)
97
+ pool = Pool(n_workers)
67
98
  # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
68
99
  all_results = list(tqdm(pool.imap(
69
- partial(process_image,image_prefix=image_prefix), filenames), total=len(filenames)))
70
-
71
- return all_results
72
-
73
-
74
- def process_list_file(input_file,output_file=None,image_prefix=None,n_threads=default_n_threads):
75
-
76
- assert os.path.isdir(os.path.dirname(output_file))
77
- assert os.path.isfile(input_file)
78
-
79
- with open(input_file,'r') as f:
80
- filenames = json.load(f)
81
- filenames = [s.strip() for s in filenames]
82
-
83
- all_results = process_images(filenames,image_prefix=image_prefix,n_threads=n_threads)
100
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
84
101
 
85
102
  if output_file is not None:
86
103
  with open(output_file,'w') as f:
87
104
  json.dump(all_results,f,indent=1)
88
-
105
+
106
+ return all_results
107
+
89
108
 
90
109
  #%% Interactive driver
91
110
 
@@ -116,8 +135,7 @@ if False:
116
135
 
117
136
  #%%
118
137
 
119
- # process_list_file(image_list_file,image_size_file,image_prefix=base_dir)
120
- process_list_file(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
138
+ get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
121
139
 
122
140
 
123
141
  #%% Command-line driver
@@ -136,7 +154,7 @@ def main():
136
154
 
137
155
  args = parser.parse_args()
138
156
 
139
- process_list_file(args.input_file,args.output_file,args.image_prefix,args.n_threads)
157
+ _ = get_image_sizes(args.input_file,args.output_file,args.image_prefix,args.n_threads)
140
158
 
141
159
 
142
160
  if __name__ == '__main__':
@@ -39,8 +39,6 @@ def animl_results_to_md_results(input_file,output_file=None):
39
39
  If [output_file] is None, '.json' will be appended to the input file.
40
40
  """
41
41
 
42
- #%%
43
-
44
42
  if output_file is None:
45
43
  output_file = input_file + '.json'
46
44
 
@@ -111,7 +109,7 @@ def animl_results_to_md_results(input_file,output_file=None):
111
109
  with open(output_file,'w') as f:
112
110
  json.dump(results,f,indent=1)
113
111
 
114
- # ...zamba_results_to_md_results(...)
112
+ # ...animl_results_to_md_results(...)
115
113
 
116
114
 
117
115
  #%% Interactive driver
@@ -165,7 +165,7 @@ for b in ir_boxes:
165
165
  use_normalized_coordinates=False,
166
166
  thickness=3)
167
167
 
168
- visualization_utils.show_images_in_a_row([img_rgb,img_ir])
168
+ # visualization_utils.show_images_in_a_row([img_rgb,img_ir])
169
169
 
170
170
 
171
171
  #%% Save images