megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +28 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +1 -1
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +1 -3
- api/batch_processing/postprocessing/md_to_labelme.py +118 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +7 -4
- data_management/databases/integrity_check_json_db.py +68 -59
- data_management/databases/subset_json_db.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +1 -3
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/labelme_to_coco.py +252 -143
- data_management/labelme_to_yolo.py +95 -52
- data_management/lila/create_lila_blank_set.py +106 -23
- data_management/lila/download_lila_subset.py +133 -65
- data_management/lila/generate_lila_per_image_labels.py +1 -1
- data_management/lila/lila_common.py +8 -38
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +3 -22
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_to_coco.py +283 -83
- detection/run_detector_batch.py +12 -3
- detection/run_inference_with_yolov5_val.py +10 -3
- detection/run_tiled_inference.py +2 -2
- detection/tf_detector.py +2 -1
- detection/video_utils.py +1 -1
- md_utils/ct_utils.py +22 -3
- md_utils/md_tests.py +11 -2
- md_utils/path_utils.py +206 -32
- md_utils/url_utils.py +66 -1
- md_utils/write_html_image_list.py +12 -3
- md_visualization/visualization_utils.py +363 -72
- md_visualization/visualize_db.py +33 -10
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
data_management/coco_to_yolo.py
CHANGED
|
@@ -56,6 +56,10 @@ def write_yolo_dataset_file(yolo_dataset_file,
|
|
|
56
56
|
class_lines = [s.strip() for s in class_lines]
|
|
57
57
|
class_list = [s for s in class_lines if len(s) > 0]
|
|
58
58
|
|
|
59
|
+
if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
|
|
60
|
+
print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
|
|
61
|
+
yolo_dataset_file))
|
|
62
|
+
|
|
59
63
|
# Write dataset.yaml
|
|
60
64
|
with open(yolo_dataset_file,'w') as f:
|
|
61
65
|
|
|
@@ -189,7 +193,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
|
|
|
189
193
|
coco_id_to_name = {}
|
|
190
194
|
yolo_id_to_name = {}
|
|
191
195
|
coco_category_ids_to_exclude = set()
|
|
192
|
-
category_exclusion_warnings_printed = set()
|
|
193
196
|
|
|
194
197
|
for category in data['categories']:
|
|
195
198
|
coco_id_to_name[category['id']] = category['name']
|
|
@@ -465,9 +468,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
|
|
|
465
468
|
#
|
|
466
469
|
# https://github.com/ultralytics/yolov5/issues/3218
|
|
467
470
|
#
|
|
468
|
-
# I think this is also true for images with empty
|
|
469
|
-
# I'm using the convention suggested on that issue, i.e. hard
|
|
470
|
-
# are expressed as images without .txt files.
|
|
471
|
+
# I think this is also true for images with empty .txt files, but
|
|
472
|
+
# I'm using the convention suggested on that issue, i.e. hard
|
|
473
|
+
# negatives are expressed as images without .txt files.
|
|
471
474
|
if len(bboxes) > 0:
|
|
472
475
|
|
|
473
476
|
with open(dest_txt,'w') as f:
|
|
@@ -24,9 +24,9 @@ import sys
|
|
|
24
24
|
|
|
25
25
|
from multiprocessing.pool import ThreadPool
|
|
26
26
|
from operator import itemgetter
|
|
27
|
-
from PIL import Image
|
|
28
27
|
from tqdm import tqdm
|
|
29
28
|
|
|
29
|
+
from md_visualization.visualization_utils import open_image
|
|
30
30
|
from md_utils import ct_utils
|
|
31
31
|
|
|
32
32
|
|
|
@@ -41,6 +41,8 @@ class IntegrityCheckOptions:
|
|
|
41
41
|
bRequireLocation = True
|
|
42
42
|
iMaxNumImages = -1
|
|
43
43
|
nThreads = 10
|
|
44
|
+
verbose = True
|
|
45
|
+
|
|
44
46
|
|
|
45
47
|
# This is used in a medium-hacky way to share modified options across threads
|
|
46
48
|
defaultOptions = IntegrityCheckOptions()
|
|
@@ -65,7 +67,9 @@ def check_image_existence_and_size(image,options=None):
|
|
|
65
67
|
print('Missing image size in {}'.format(filePath))
|
|
66
68
|
return False
|
|
67
69
|
|
|
68
|
-
width, height = Image.open(filePath).size
|
|
70
|
+
# width, height = Image.open(filePath).size
|
|
71
|
+
pil_im = open_image(filePath)
|
|
72
|
+
width,height = pil_im.size
|
|
69
73
|
if (not (width == image['width'] and height == image['height'])):
|
|
70
74
|
print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
|
|
71
75
|
image['id'], filePath, image['width'], image['height'], width, height))
|
|
@@ -86,8 +90,9 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
86
90
|
|
|
87
91
|
if options.bCheckImageSizes:
|
|
88
92
|
options.bCheckImageExistence = True
|
|
89
|
-
|
|
90
|
-
|
|
93
|
+
|
|
94
|
+
if options.verbose:
|
|
95
|
+
print(options.__dict__)
|
|
91
96
|
|
|
92
97
|
if options.baseDir is None:
|
|
93
98
|
options.baseDir = ''
|
|
@@ -105,8 +110,9 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
105
110
|
|
|
106
111
|
assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
|
|
107
112
|
|
|
108
|
-
|
|
109
|
-
|
|
113
|
+
if options.verbose:
|
|
114
|
+
print('Reading .json {} with base dir [{}]...'.format(
|
|
115
|
+
jsonFile,baseDir))
|
|
110
116
|
|
|
111
117
|
with open(jsonFile,'r') as f:
|
|
112
118
|
data = json.load(f)
|
|
@@ -133,7 +139,8 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
133
139
|
catNameToCat = {}
|
|
134
140
|
imageLocationSet = set()
|
|
135
141
|
|
|
136
|
-
|
|
142
|
+
if options.verbose:
|
|
143
|
+
print('Checking categories...')
|
|
137
144
|
|
|
138
145
|
for cat in tqdm(categories):
|
|
139
146
|
|
|
@@ -157,11 +164,13 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
157
164
|
|
|
158
165
|
# ...for each category
|
|
159
166
|
|
|
160
|
-
|
|
167
|
+
if options.verbose:
|
|
168
|
+
print('\nChecking images...')
|
|
161
169
|
|
|
162
170
|
if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
|
|
163
171
|
|
|
164
|
-
|
|
172
|
+
if options.verbose:
|
|
173
|
+
print('Trimming image list to {}'.format(options.iMaxNumImages))
|
|
165
174
|
images = images[0:options.iMaxNumImages]
|
|
166
175
|
|
|
167
176
|
imagePathsInJson = set()
|
|
@@ -217,7 +226,8 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
217
226
|
# Are we checking for unused images?
|
|
218
227
|
if (len(baseDir) > 0) and options.bFindUnusedImages:
|
|
219
228
|
|
|
220
|
-
|
|
229
|
+
if options.verbose:
|
|
230
|
+
print('\nEnumerating images...')
|
|
221
231
|
|
|
222
232
|
# Recursively enumerate images
|
|
223
233
|
imagePaths = []
|
|
@@ -244,8 +254,9 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
244
254
|
|
|
245
255
|
if len(baseDir) == 0:
|
|
246
256
|
print('Warning: checking image sizes without a base directory, assuming "."')
|
|
247
|
-
|
|
248
|
-
|
|
257
|
+
|
|
258
|
+
if options.verbose:
|
|
259
|
+
print('Checking image existence and/or image sizes...')
|
|
249
260
|
|
|
250
261
|
if options.nThreads is not None and options.nThreads > 1:
|
|
251
262
|
pool = ThreadPool(options.nThreads)
|
|
@@ -265,9 +276,9 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
265
276
|
|
|
266
277
|
# ...for each image
|
|
267
278
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
279
|
+
if options.verbose:
|
|
280
|
+
print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
|
|
281
|
+
print('Checking annotations...')
|
|
271
282
|
|
|
272
283
|
nBoxes = 0
|
|
273
284
|
|
|
@@ -302,58 +313,56 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
302
313
|
catIdToCat[ann['category_id']]['_count'] +=1
|
|
303
314
|
|
|
304
315
|
# ...for each annotation
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
##%% Print statistics
|
|
308
316
|
|
|
309
|
-
|
|
310
|
-
nUnannotated = 0
|
|
311
|
-
nMultiAnnotated = 0
|
|
317
|
+
sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
|
|
312
318
|
|
|
313
|
-
for image in images:
|
|
314
|
-
if image['_count'] == 0:
|
|
315
|
-
nUnannotated += 1
|
|
316
|
-
elif image['_count'] > 1:
|
|
317
|
-
nMultiAnnotated += 1
|
|
318
|
-
|
|
319
|
-
print('Found {} unannotated images, {} images with multiple annotations'.format(
|
|
320
|
-
nUnannotated,nMultiAnnotated))
|
|
321
319
|
|
|
322
|
-
|
|
323
|
-
print('Found {} unused image files'.format(len(unusedFiles)))
|
|
324
|
-
|
|
325
|
-
nUnusedCategories = 0
|
|
320
|
+
##%% Print statistics
|
|
326
321
|
|
|
327
|
-
|
|
328
|
-
for cat in categories:
|
|
329
|
-
if cat['_count'] == 0:
|
|
330
|
-
print('Unused category: {}'.format(cat['name']))
|
|
331
|
-
nUnusedCategories += 1
|
|
322
|
+
if options.verbose:
|
|
332
323
|
|
|
333
|
-
|
|
324
|
+
# Find un-annotated images and multi-annotation images
|
|
325
|
+
nUnannotated = 0
|
|
326
|
+
nMultiAnnotated = 0
|
|
327
|
+
|
|
328
|
+
for image in images:
|
|
329
|
+
if image['_count'] == 0:
|
|
330
|
+
nUnannotated += 1
|
|
331
|
+
elif image['_count'] > 1:
|
|
332
|
+
nMultiAnnotated += 1
|
|
333
|
+
|
|
334
|
+
print('Found {} unannotated images, {} images with multiple annotations'.format(
|
|
335
|
+
nUnannotated,nMultiAnnotated))
|
|
336
|
+
|
|
337
|
+
if (len(baseDir) > 0) and options.bFindUnusedImages:
|
|
338
|
+
print('Found {} unused image files'.format(len(unusedFiles)))
|
|
334
339
|
|
|
335
|
-
|
|
336
|
-
if len(sequences) > 0:
|
|
337
|
-
sequenceString = '{} sequences'.format(len(sequences))
|
|
340
|
+
nUnusedCategories = 0
|
|
338
341
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
for cat in sortedCategories:
|
|
354
|
-
print('{:6} {}'.format(cat['_count'],cat['name']))
|
|
342
|
+
# Find unused categories
|
|
343
|
+
for cat in categories:
|
|
344
|
+
if cat['_count'] == 0:
|
|
345
|
+
print('Unused category: {}'.format(cat['name']))
|
|
346
|
+
nUnusedCategories += 1
|
|
347
|
+
|
|
348
|
+
print('Found {} unused categories'.format(nUnusedCategories))
|
|
349
|
+
|
|
350
|
+
sequenceString = 'no sequence info'
|
|
351
|
+
if len(sequences) > 0:
|
|
352
|
+
sequenceString = '{} sequences'.format(len(sequences))
|
|
353
|
+
|
|
354
|
+
print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
|
|
355
|
+
len(images),len(annotations),nBoxes,len(categories),sequenceString))
|
|
355
356
|
|
|
356
|
-
|
|
357
|
+
if len(imageLocationSet) > 0:
|
|
358
|
+
print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
|
|
359
|
+
|
|
360
|
+
print('Categories and annotation (not image) counts:\n')
|
|
361
|
+
|
|
362
|
+
for cat in sortedCategories:
|
|
363
|
+
print('{:6} {}'.format(cat['_count'],cat['name']))
|
|
364
|
+
|
|
365
|
+
print('')
|
|
357
366
|
|
|
358
367
|
errorInfo = {}
|
|
359
368
|
errorInfo['unusedFiles'] = unusedFiles
|
|
@@ -65,7 +65,7 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
|
|
|
65
65
|
# Write the output file if requested
|
|
66
66
|
if output_json is not None:
|
|
67
67
|
print('Writing output .json...')
|
|
68
|
-
json.dump(output_data,open(output_json,'w'),indent=
|
|
68
|
+
json.dump(output_data,open(output_json,'w'),indent=1)
|
|
69
69
|
|
|
70
70
|
return output_data
|
|
71
71
|
|
|
@@ -15,6 +15,8 @@ import os
|
|
|
15
15
|
from PIL import Image
|
|
16
16
|
import sys
|
|
17
17
|
|
|
18
|
+
from md_utils.path_utils import find_images
|
|
19
|
+
|
|
18
20
|
from multiprocessing.pool import ThreadPool
|
|
19
21
|
from multiprocessing.pool import Pool
|
|
20
22
|
from functools import partial
|
|
@@ -27,7 +29,11 @@ use_threads = False
|
|
|
27
29
|
|
|
28
30
|
#%% Processing functions
|
|
29
31
|
|
|
30
|
-
def
|
|
32
|
+
def _get_image_size(image_path,image_prefix=None):
|
|
33
|
+
"""
|
|
34
|
+
Support function to get the size of a single image. Returns a (path,w,h) tuple.
|
|
35
|
+
w and h will be -1 if the image fails to load.
|
|
36
|
+
"""
|
|
31
37
|
|
|
32
38
|
if image_prefix is not None:
|
|
33
39
|
full_path = os.path.join(image_prefix,image_path)
|
|
@@ -49,43 +55,56 @@ def process_image(image_path,image_prefix=None):
|
|
|
49
55
|
return (image_path,-1,-1)
|
|
50
56
|
|
|
51
57
|
|
|
52
|
-
def
|
|
58
|
+
def get_image_sizes(filenames,image_prefix=None,output_file=None,
|
|
59
|
+
n_workers=default_n_threads,use_threads=True,
|
|
60
|
+
recursive=True):
|
|
61
|
+
"""
|
|
62
|
+
Get the width and height of all images in [filenames], which can be:
|
|
63
|
+
|
|
64
|
+
* A .json-formatted file
|
|
65
|
+
* A folder
|
|
66
|
+
* A list of files
|
|
67
|
+
|
|
68
|
+
...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
if output_file is not None:
|
|
72
|
+
assert os.path.isdir(os.path.dirname(output_file)), \
|
|
73
|
+
'Illegal output file {}, parent folder does not exist'.format(output_file)
|
|
74
|
+
|
|
75
|
+
if isinstance(filenames,str) and os.path.isfile(filenames):
|
|
76
|
+
with open(filenames,'r') as f:
|
|
77
|
+
filenames = json.load(f)
|
|
78
|
+
filenames = [s.strip() for s in filenames]
|
|
79
|
+
elif isinstance(filenames,str) and os.path.isdir(filenames):
|
|
80
|
+
filenames = find_images(filenames,recursive=recursive,
|
|
81
|
+
return_relative_paths=False,convert_slashes=True)
|
|
82
|
+
else:
|
|
83
|
+
assert isinstance(filenames,list)
|
|
53
84
|
|
|
54
|
-
if
|
|
85
|
+
if n_workers <= 1:
|
|
55
86
|
|
|
56
87
|
all_results = []
|
|
57
88
|
for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
|
|
58
|
-
all_results.append(
|
|
89
|
+
all_results.append(_get_image_size(fn,image_prefix=image_prefix))
|
|
59
90
|
|
|
60
91
|
else:
|
|
61
92
|
|
|
62
|
-
print('Creating a pool with {}
|
|
93
|
+
print('Creating a pool with {} workers'.format(n_workers))
|
|
63
94
|
if use_threads:
|
|
64
|
-
pool = ThreadPool(
|
|
95
|
+
pool = ThreadPool(n_workers)
|
|
65
96
|
else:
|
|
66
|
-
pool = Pool(
|
|
97
|
+
pool = Pool(n_workers)
|
|
67
98
|
# all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
|
|
68
99
|
all_results = list(tqdm(pool.imap(
|
|
69
|
-
partial(
|
|
70
|
-
|
|
71
|
-
return all_results
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def process_list_file(input_file,output_file=None,image_prefix=None,n_threads=default_n_threads):
|
|
75
|
-
|
|
76
|
-
assert os.path.isdir(os.path.dirname(output_file))
|
|
77
|
-
assert os.path.isfile(input_file)
|
|
78
|
-
|
|
79
|
-
with open(input_file,'r') as f:
|
|
80
|
-
filenames = json.load(f)
|
|
81
|
-
filenames = [s.strip() for s in filenames]
|
|
82
|
-
|
|
83
|
-
all_results = process_images(filenames,image_prefix=image_prefix,n_threads=n_threads)
|
|
100
|
+
partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
|
|
84
101
|
|
|
85
102
|
if output_file is not None:
|
|
86
103
|
with open(output_file,'w') as f:
|
|
87
104
|
json.dump(all_results,f,indent=1)
|
|
88
|
-
|
|
105
|
+
|
|
106
|
+
return all_results
|
|
107
|
+
|
|
89
108
|
|
|
90
109
|
#%% Interactive driver
|
|
91
110
|
|
|
@@ -116,8 +135,7 @@ if False:
|
|
|
116
135
|
|
|
117
136
|
#%%
|
|
118
137
|
|
|
119
|
-
|
|
120
|
-
process_list_file(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
|
|
138
|
+
get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
|
|
121
139
|
|
|
122
140
|
|
|
123
141
|
#%% Command-line driver
|
|
@@ -136,7 +154,7 @@ def main():
|
|
|
136
154
|
|
|
137
155
|
args = parser.parse_args()
|
|
138
156
|
|
|
139
|
-
|
|
157
|
+
_ = get_image_sizes(args.input_file,args.output_file,args.image_prefix,args.n_threads)
|
|
140
158
|
|
|
141
159
|
|
|
142
160
|
if __name__ == '__main__':
|
|
@@ -39,8 +39,6 @@ def animl_results_to_md_results(input_file,output_file=None):
|
|
|
39
39
|
If [output_file] is None, '.json' will be appended to the input file.
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
|
-
#%%
|
|
43
|
-
|
|
44
42
|
if output_file is None:
|
|
45
43
|
output_file = input_file + '.json'
|
|
46
44
|
|
|
@@ -111,7 +109,7 @@ def animl_results_to_md_results(input_file,output_file=None):
|
|
|
111
109
|
with open(output_file,'w') as f:
|
|
112
110
|
json.dump(results,f,indent=1)
|
|
113
111
|
|
|
114
|
-
# ...
|
|
112
|
+
# ...animl_results_to_md_results(...)
|
|
115
113
|
|
|
116
114
|
|
|
117
115
|
#%% Interactive driver
|