megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # labelme_to_coco.py
4
4
  #
5
- # Converts a folder of labelme-formatted .json files to COCO.
5
+ # Converts a folder of labelme-formatted .json files to COCO format.
6
6
  #
7
7
  ########
8
8
 
@@ -15,21 +15,201 @@ import uuid
15
15
  from md_utils import path_utils
16
16
  from md_visualization.visualization_utils import open_image
17
17
 
18
+ from multiprocessing.pool import Pool, ThreadPool
19
+ from functools import partial
20
+
18
21
  from tqdm import tqdm
19
22
 
20
23
 
21
- #%% Functions
24
+ #%% Support functions
25
+
26
+ def add_category(category_name,category_name_to_id,candidate_category_id=0):
27
+ """
28
+ Add the category [category_name] to the dict [category_name_to_id], by default
29
+ using the next available integer index.
30
+ """
31
+
32
+ if category_name in category_name_to_id:
33
+ return category_name_to_id[category_name]
34
+ while candidate_category_id in category_name_to_id.values():
35
+ candidate_category_id += 1
36
+ category_name_to_id[category_name] = candidate_category_id
37
+ return candidate_category_id
38
+
39
+
40
+ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
41
+ no_json_handling,validate_image_sizes,
42
+ category_name_to_id,allow_new_categories=True):
43
+ """
44
+ Internal function for processing each image; this support function facilitates parallelization.
45
+ """
46
+
47
+ result = {}
48
+ result['im'] = None
49
+ result['annotations_this_image'] = None
50
+ result['status'] = None
51
+
52
+ image_fn_abs = os.path.join(input_folder,image_fn_relative)
53
+ json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
54
+
55
+ im = {}
56
+ im['id'] = image_fn_relative
57
+ im['file_name'] = image_fn_relative
58
+
59
+ # If there's no .json file for this image...
60
+ if not os.path.isfile(json_fn_abs):
61
+
62
+ # Either skip it...
63
+ if no_json_handling == 'skip':
64
+ print('Skipping image {} (no .json file)'.format(image_fn_relative))
65
+ result['status'] = 'skipped (no .json file)'
66
+ return result
67
+
68
+ # ...or error
69
+ elif no_json_handling == 'error':
70
+ raise ValueError('Image file {} has no corresponding .json file'.format(
71
+ image_fn_relative))
72
+
73
+ # ...or treat it as empty.
74
+ elif no_json_handling == 'empty':
75
+ try:
76
+ pil_im = open_image(image_fn_abs)
77
+ except Exception:
78
+ print('Warning: error opening image {}, skipping'.format(image_fn_abs))
79
+ result['status'] = 'image load error'
80
+ return result
81
+ im['width'] = pil_im.width
82
+ im['height'] = pil_im.height
83
+
84
+ # Just in case we need to differentiate between "no .json file" and "a .json file with no annotations"
85
+ im['no_labelme_json'] = True
86
+ shapes = []
87
+ else:
88
+ raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
89
+ no_json_handling))
90
+
91
+ # If we found a .json file for this image...
92
+ else:
93
+
94
+ # Read the .json file
95
+ with open(json_fn_abs,'r') as f:
96
+ labelme_data = json.load(f)
97
+ im['width'] = labelme_data['imageWidth']
98
+ im['height'] = labelme_data['imageHeight']
99
+
100
+ if validate_image_sizes:
101
+ try:
102
+ pil_im = open_image(image_fn_abs)
103
+ except Exception:
104
+ print('Warning: error opening image {} for size validation, skipping'.format(image_fn_abs))
105
+ result['status'] = 'skipped (size validation error)'
106
+ return result
107
+ if not (im['width'] == pil_im.width and im['height'] == pil_im.height):
108
+ print('Warning: image size validation error for file {}'.format(image_fn_relative))
109
+ im['width'] = pil_im.width
110
+ im['height'] = pil_im.height
111
+ im['labelme_width'] = labelme_data['imageWidth']
112
+ im['labelme_height'] = labelme_data['imageHeight']
113
+
114
+ shapes = labelme_data['shapes']
115
+
116
+ if ('flags' in labelme_data) and (len(labelme_data['flags']) > 0):
117
+ im['flags'] = labelme_data['flags']
118
+
119
+ annotations_this_image = []
120
+
121
+ if len(shapes) == 0:
122
+
123
+ if allow_new_categories:
124
+ category_id = add_category('empty',category_name_to_id)
125
+ else:
126
+ assert 'empty' in category_name_to_id
127
+ category_id = category_name_to_id['empty']
128
+
129
+ ann = {}
130
+ ann['id'] = str(uuid.uuid1())
131
+ ann['image_id'] = im['id']
132
+ ann['category_id'] = category_id
133
+ ann['sequence_level_annotation'] = False
134
+ annotations_this_image.append(ann)
135
+
136
+ else:
137
+
138
+ for shape in shapes:
139
+
140
+ if shape['shape_type'] != 'rectangle':
141
+ print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
142
+ shape['shape_type'],image_fn_relative))
143
+ continue
144
+
145
+ if use_folders_as_labels:
146
+ category_name = os.path.basename(os.path.dirname(image_fn_abs))
147
+ else:
148
+ category_name = shape['label']
149
+
150
+ if allow_new_categories:
151
+ category_id = add_category(category_name,category_name_to_id)
152
+ else:
153
+ assert category_name in category_name_to_id
154
+ category_id = category_name_to_id[category_name]
155
+
156
+ points = shape['points']
157
+ if len(points) != 2:
158
+ print('Warning: illegal rectangle with {} points for {}'.format(
159
+ len(points),image_fn_relative))
160
+ continue
161
+
162
+ p0 = points[0]
163
+ p1 = points[1]
164
+ x0 = min(p0[0],p1[0])
165
+ x1 = max(p0[0],p1[0])
166
+ y0 = min(p0[1],p1[1])
167
+ y1 = max(p0[1],p1[1])
168
+
169
+ bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
170
+ ann = {}
171
+ ann['id'] = str(uuid.uuid1())
172
+ ann['image_id'] = im['id']
173
+ ann['category_id'] = category_id
174
+ ann['sequence_level_annotation'] = False
175
+ ann['bbox'] = bbox
176
+ annotations_this_image.append(ann)
177
+
178
+ # ...for each shape
179
+
180
+ result['im'] = im
181
+ result['annotations_this_image'] = annotations_this_image
182
+
183
+ return result
184
+
185
+ # ...def _process_labelme_file(...)
186
+
22
187
 
23
- def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=None,
24
- empty_category_name='empty',empty_category_id=None,info_struct=None,
25
- relative_paths_to_include=None,relative_paths_to_exclude=None,
26
- use_folders_as_labels=False,recursive=True,no_json_handling='skip',
27
- validate_image_sizes=True):
188
+ #%% Main function
189
+
190
+ def labelme_to_coco(input_folder,
191
+ output_file=None,
192
+ category_id_to_category_name=None,
193
+ empty_category_name='empty',
194
+ empty_category_id=None,
195
+ info_struct=None,
196
+ relative_paths_to_include=None,
197
+ relative_paths_to_exclude=None,
198
+ use_folders_as_labels=False,
199
+ recursive=True,
200
+ no_json_handling='skip',
201
+ validate_image_sizes=True,
202
+ max_workers=1,
203
+ use_threads=True):
28
204
  """
29
205
  Find all images in [input_folder] that have corresponding .json files, and convert
30
206
  to a COCO .json file.
31
207
 
32
- Currently only supports bounding box annotations.
208
+ Currently only supports bounding box annotations and image-level flags (i.e., does not
209
+ support point or general polygon annotations).
210
+
211
+ Labelme's image-level flags don't quite fit the COCO annotations format, so they are attached
212
+ to image objects, rather than annotation objects.
33
213
 
34
214
  If output_file is None, just returns the resulting dict, does not write to file.
35
215
 
@@ -48,32 +228,59 @@ def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=N
48
228
 
49
229
  * 'skip': ignore image files with no corresponding .json files
50
230
  * 'empty': treat image files with no corresponding .json files as empty
51
- * 'error': throw an error when an image file has no corresponding .json file
231
+ * 'error': throw an error when an image file has no corresponding .json file
52
232
  """
53
233
 
234
+ if max_workers > 1:
235
+ assert category_id_to_category_name is not None, \
236
+ 'When parallelizing labelme --> COCO conversion, you must supply a category mapping'
237
+
54
238
  if category_id_to_category_name is None:
55
239
  category_name_to_id = {}
56
240
  else:
57
241
  category_name_to_id = {v: k for k, v in category_id_to_category_name.items()}
58
-
59
242
  for category_name in category_name_to_id:
60
243
  try:
61
244
  category_name_to_id[category_name] = int(category_name_to_id[category_name])
62
245
  except ValueError:
63
246
  raise ValueError('Category IDs must be ints or string-formatted ints')
247
+
248
+ # If the user supplied an explicit empty category ID, and the empty category
249
+ # name is already in category_name_to_id, make sure they match.
250
+ if empty_category_id is not None:
251
+ if empty_category_name in category_name_to_id:
252
+ assert category_name_to_id[empty_category_name] == empty_category_id, \
253
+ 'Ambiguous empty category specification'
254
+ if empty_category_id in category_id_to_category_name:
255
+ assert category_id_to_category_name[empty_category_id] == empty_category_name, \
256
+ 'Ambiguous empty category specification'
257
+ else:
258
+ if empty_category_name in category_name_to_id:
259
+ empty_category_id = category_name_to_id[empty_category_name]
64
260
 
261
+ del category_id_to_category_name
262
+
65
263
  # Enumerate images
264
+ print('Enumerating images in {}'.format(input_folder))
66
265
  image_filenames_relative = path_utils.find_images(input_folder,recursive=recursive,
67
- return_relative_paths=True)
68
-
69
- def add_category(category_name,candidate_category_id=0):
70
- if category_name in category_name_to_id:
71
- return category_name_to_id[category_name]
72
- while candidate_category_id in category_name_to_id.values():
73
- candidate_category_id += 1
74
- category_name_to_id[category_name] = candidate_category_id
75
- return candidate_category_id
266
+ return_relative_paths=True,
267
+ convert_slashes=True)
268
+
269
+ # Remove any images we're supposed to skip
270
+ if (relative_paths_to_include is not None) or (relative_paths_to_exclude is not None):
271
+ image_filenames_relative_to_process = []
272
+ for image_fn_relative in image_filenames_relative:
273
+ if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
274
+ continue
275
+ if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
276
+ continue
277
+ image_filenames_relative_to_process.append(image_fn_relative)
278
+ print('Processing {} of {} images'.format(
279
+ len(image_filenames_relative_to_process),
280
+ len(image_filenames_relative)))
281
+ image_filenames_relative = image_filenames_relative_to_process
76
282
 
283
+ # If the user supplied a category ID to use for empty images...
77
284
  if empty_category_id is not None:
78
285
  try:
79
286
  empty_category_id = int(empty_category_id)
@@ -81,122 +288,52 @@ def labelme_to_coco(input_folder,output_file=None,category_id_to_category_name=N
81
288
  raise ValueError('Category IDs must be ints or string-formatted ints')
82
289
 
83
290
  if empty_category_id is None:
84
- empty_category_id = add_category(empty_category_name)
85
-
86
- images = []
87
- annotations = []
88
-
89
- # image_fn_relative = image_filenames_relative[0]
90
- for image_fn_relative in tqdm(image_filenames_relative):
91
-
92
- if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
93
- continue
94
- if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
95
- continue
96
-
97
- image_fn_abs = os.path.join(input_folder,image_fn_relative)
98
- json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
99
-
100
- im = {}
101
- im['id'] = image_fn_relative
102
- im['file_name'] = image_fn_relative
103
-
104
- # If there's no .json file for this image...
105
- if not os.path.isfile(json_fn_abs):
291
+ empty_category_id = add_category(empty_category_name,category_name_to_id)
106
292
 
107
- # Either skip it...
108
- if no_json_handling == 'skip':
109
- continue
293
+ if max_workers <= 1:
294
+
295
+ image_results = []
296
+ for image_fn_relative in tqdm(image_filenames_relative):
110
297
 
111
- # ...or error
112
- elif no_json_handling == 'error':
113
- raise ValueError('Image file {} has no corresponding .json file'.format(
114
- image_fn_relative))
298
+ result = _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
299
+ no_json_handling,validate_image_sizes,
300
+ category_name_to_id,allow_new_categories=True)
301
+ image_results.append(result)
115
302
 
116
- # ...or treat it as empty.
117
- elif no_json_handling == 'empty':
118
- try:
119
- pil_im = open_image(image_fn_abs)
120
- except Exception:
121
- print('Warning: error opening image {}, skipping'.format(image_fn_abs))
122
- continue
123
- im['width'] = pil_im.width
124
- im['height'] = pil_im.height
125
- shapes = []
126
- else:
127
- raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
128
- no_json_handling))
303
+ else:
129
304
 
130
- # If we found a .json file for this image...
305
+ n_workers = min(max_workers,len(image_filenames_relative))
306
+ assert category_name_to_id is not None
307
+
308
+ if use_threads:
309
+ pool = ThreadPool(n_workers)
131
310
  else:
132
-
133
- # Read the .json file
134
- with open(json_fn_abs,'r') as f:
135
- labelme_data = json.load(f)
136
- im['width'] = labelme_data['imageWidth']
137
- im['height'] = labelme_data['imageHeight']
138
-
139
- if validate_image_sizes:
140
- try:
141
- pil_im = open_image(image_fn_abs)
142
- except Exception:
143
- print('Warning: error opening image {}, skipping'.format(image_fn_abs))
144
- continue
145
- assert im['width'] == pil_im.width and im['height'] == pil_im.height, \
146
- 'Image size validation error for file {}'.format(image_fn_relative)
147
-
148
- shapes = labelme_data['shapes']
311
+ pool = Pool(n_workers)
149
312
 
150
- if len(shapes) == 0:
151
-
152
- category_id = add_category('empty')
153
- ann = {}
154
- ann['id'] = str(uuid.uuid1())
155
- ann['image_id'] = im['id']
156
- ann['category_id'] = category_id
157
- ann['sequence_level_annotation'] = False
158
- annotations.append(ann)
159
-
313
+ image_results = list(tqdm(pool.imap(
314
+ partial(_process_labelme_file,
315
+ input_folder=input_folder,
316
+ use_folders_as_labels=use_folders_as_labels,
317
+ no_json_handling=no_json_handling,
318
+ validate_image_sizes=validate_image_sizes,
319
+ category_name_to_id=category_name_to_id,
320
+ allow_new_categories=False
321
+ ),image_filenames_relative), total=len(image_filenames_relative)))
322
+
323
+ images = []
324
+ annotations = []
325
+
326
+ # Flatten the lists of images and annotations
327
+ for result in image_results:
328
+ im = result['im']
329
+ annotations_this_image = result['annotations_this_image']
330
+
331
+ if im is None:
332
+ assert annotations_this_image is None
160
333
  else:
334
+ images.append(im)
335
+ annotations.extend(annotations_this_image)
161
336
 
162
- for shape in shapes:
163
- if shape['shape_type'] != 'rectangle':
164
- print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
165
- shape['shape_type'],image_fn_relative))
166
- continue
167
-
168
- if use_folders_as_labels:
169
- category_name = os.path.basename(os.path.dirname(image_fn_abs))
170
- else:
171
- category_name = shape['label']
172
-
173
- category_id = add_category(category_name)
174
-
175
- points = shape['points']
176
- assert len(points) == 2, 'Illegal rectangle with {} points'.format(
177
- len(points))
178
-
179
- p0 = points[0]
180
- p1 = points[1]
181
- x0 = min(p0[0],p1[0])
182
- x1 = max(p0[0],p1[0])
183
- y0 = min(p0[1],p1[1])
184
- y1 = max(p0[1],p1[1])
185
- bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
186
- ann = {}
187
- ann['id'] = str(uuid.uuid1())
188
- ann['image_id'] = im['id']
189
- ann['category_id'] = category_id
190
- ann['sequence_level_annotation'] = False
191
- ann['bbox'] = bbox
192
- annotations.append(ann)
193
-
194
- # ...for each shape
195
-
196
- images.append(im)
197
-
198
- # ..for each image
199
-
200
337
  output_dict = {}
201
338
  output_dict['images'] = images
202
339
  output_dict['annotations'] = annotations