megadetector 5.0.25__py3-none-any.whl → 5.0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/create_crop_folder.py +95 -33
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +156 -42
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +4 -2
- megadetector/utils/directory_listing.py +1 -1
- megadetector/utils/md_tests.py +2 -1
- megadetector/utils/path_utils.py +308 -19
- megadetector/utils/wi_utils.py +363 -186
- megadetector/visualization/visualization_utils.py +2 -1
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/METADATA +4 -3
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/RECORD +34 -34
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/WHEEL +1 -1
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/top_level.txt +0 -0
|
@@ -300,7 +300,10 @@ class SequenceOptions:
|
|
|
300
300
|
def __init__(self):
|
|
301
301
|
#: Images separated by <= this duration will be grouped into the same sequence.
|
|
302
302
|
self.episode_interval_seconds = 60.0
|
|
303
|
-
|
|
303
|
+
|
|
304
|
+
#: How to handle invalid datetimes: 'error' or 'none'
|
|
305
|
+
self.datetime_conversion_failure_behavior = 'none'
|
|
306
|
+
|
|
304
307
|
|
|
305
308
|
#%% Functions
|
|
306
309
|
|
|
@@ -445,7 +448,17 @@ def create_sequences(image_info,options=None):
|
|
|
445
448
|
raise ValueError('Unrecognized type for [image_info]')
|
|
446
449
|
|
|
447
450
|
# Modifies the images in place
|
|
448
|
-
_ = parse_datetimes_from_cct_image_list(image_info
|
|
451
|
+
_ = parse_datetimes_from_cct_image_list(image_info,
|
|
452
|
+
conversion_failure_behavior=options.datetime_conversion_failure_behavior)
|
|
453
|
+
|
|
454
|
+
n_invalid_datetimes = 0
|
|
455
|
+
for im in image_info:
|
|
456
|
+
if not isinstance(im['datetime'],datetime.datetime):
|
|
457
|
+
assert im['datetime'] is None, 'At this point, datetimes should be valid or None'
|
|
458
|
+
n_invalid_datetimes += 1
|
|
459
|
+
if n_invalid_datetimes > 0:
|
|
460
|
+
print('Warning: {} of {} images have invalid datetimes'.format(
|
|
461
|
+
n_invalid_datetimes,len(image_info)))
|
|
449
462
|
|
|
450
463
|
# Find all unique locations
|
|
451
464
|
locations = set()
|
|
@@ -47,6 +47,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
|
|
|
47
47
|
class_list (list or str): an ordered list of class names (the first item will be class 0,
|
|
48
48
|
etc.), or the name of a text file containing an ordered list of class names (one per
|
|
49
49
|
line, starting from class zero).
|
|
50
|
+
train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
|
|
51
|
+
val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
|
|
52
|
+
test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
|
|
50
53
|
"""
|
|
51
54
|
|
|
52
55
|
# Read class names
|
|
@@ -97,7 +100,7 @@ def coco_to_yolo(input_image_folder,
|
|
|
97
100
|
category_names_to_exclude=None,
|
|
98
101
|
category_names_to_include=None,
|
|
99
102
|
write_output=True,
|
|
100
|
-
flatten_paths=
|
|
103
|
+
flatten_paths=False):
|
|
101
104
|
"""
|
|
102
105
|
Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
|
|
103
106
|
dataset to a single folder in the process.
|
|
@@ -116,17 +119,21 @@ def coco_to_yolo(input_image_folder,
|
|
|
116
119
|
images are left alone.
|
|
117
120
|
source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
|
|
118
121
|
is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
|
|
119
|
-
annotation
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
annotation as a special case, i.e. that's how an empty image is indicated. The original
|
|
123
|
+
COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
|
|
124
|
+
either treat images as empty or error, depending on the value of [allow_empty_annotations].
|
|
125
|
+
[allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
|
|
126
|
+
overwrite_images (bool, optional): over-write images in the output folder if they exist
|
|
124
127
|
create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
|
|
125
128
|
'labels' in the YOLO output folder. If create_image_and_label_folders is False,
|
|
126
129
|
a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
|
|
127
130
|
be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
|
|
128
131
|
images/a#b#c#image001.jpg, and the corresponding text file will be
|
|
129
132
|
labels/a#b#c#image001.txt.
|
|
133
|
+
class_file_name (str, optional): .txt file (relative to the output folder) that we should
|
|
134
|
+
populate with a list of classes (or None to omit)
|
|
135
|
+
allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
|
|
136
|
+
we'll error on annotations that have no 'bbox' field
|
|
130
137
|
clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
|
|
131
138
|
converting to YOLO xywh format
|
|
132
139
|
image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
|
|
@@ -139,12 +146,14 @@ def coco_to_yolo(input_image_folder,
|
|
|
139
146
|
category_names_to_exclude (str, optional): category names that should not be represented in the
|
|
140
147
|
YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
|
|
141
148
|
you would want to specify this and [category_names_to_include].
|
|
142
|
-
category_names_to_include (str, optional): allow-list of category names that should be represented
|
|
143
|
-
YOLO output; only impacts annotations, does not prevent copying images. There's almost
|
|
144
|
-
you would want to specify this and [category_names_to_exclude].
|
|
149
|
+
category_names_to_include (str, optional): allow-list of category names that should be represented
|
|
150
|
+
in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
|
|
151
|
+
no reason you would want to specify this and [category_names_to_exclude].
|
|
145
152
|
write_output (bool, optional): determines whether we actually copy images and write annotations;
|
|
146
153
|
setting this to False mostly puts this function in "dry run" "mode. The class list
|
|
147
154
|
file is written regardless of the value of write_output.
|
|
155
|
+
flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
|
|
156
|
+
which ensures that the output folder is a single flat folder.
|
|
148
157
|
|
|
149
158
|
Returns:
|
|
150
159
|
dict: information about the coco --> yolo mapping, containing at least the fields:
|
|
@@ -313,9 +322,9 @@ def coco_to_yolo(input_image_folder,
|
|
|
313
322
|
|
|
314
323
|
elif source_format == 'coco_camera_traps':
|
|
315
324
|
|
|
316
|
-
# We allow empty bbox lists in COCO camera traps; this is typically a
|
|
317
|
-
# example in a dataset that has bounding boxes, and 0 is typically
|
|
318
|
-
# category.
|
|
325
|
+
# We allow empty bbox lists in COCO camera traps files; this is typically a
|
|
326
|
+
# negative example in a dataset that has bounding boxes, and 0 is typically
|
|
327
|
+
# the empty category, which is typically 0.
|
|
319
328
|
if ann['category_id'] != 0:
|
|
320
329
|
if not printed_empty_annotation_warning:
|
|
321
330
|
printed_empty_annotation_warning = True
|
|
@@ -429,13 +438,14 @@ def coco_to_yolo(input_image_folder,
|
|
|
429
438
|
|
|
430
439
|
print('Generating class list')
|
|
431
440
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
441
|
+
if class_file_name is not None:
|
|
442
|
+
class_list_filename = os.path.join(output_folder,class_file_name)
|
|
443
|
+
with open(class_list_filename, 'w') as f:
|
|
444
|
+
print('Writing class list to {}'.format(class_list_filename))
|
|
445
|
+
for i_class in range(0,len(yolo_id_to_name)):
|
|
446
|
+
# Category IDs should range from 0..N-1
|
|
447
|
+
assert i_class in yolo_id_to_name
|
|
448
|
+
f.write(yolo_id_to_name[i_class] + '\n')
|
|
439
449
|
|
|
440
450
|
if image_id_to_output_image_json_file is not None:
|
|
441
451
|
print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
|
|
@@ -457,6 +467,9 @@ def coco_to_yolo(input_image_folder,
|
|
|
457
467
|
|
|
458
468
|
source_image_to_dest_image = {}
|
|
459
469
|
|
|
470
|
+
label_files_written = []
|
|
471
|
+
n_boxes_written = 0
|
|
472
|
+
|
|
460
473
|
# TODO: parallelize this loop
|
|
461
474
|
#
|
|
462
475
|
# output_info = images_to_copy[0]
|
|
@@ -471,6 +484,7 @@ def coco_to_yolo(input_image_folder,
|
|
|
471
484
|
|
|
472
485
|
source_image_to_dest_image[source_image] = dest_image
|
|
473
486
|
|
|
487
|
+
# Copy the image if necessary
|
|
474
488
|
if write_output:
|
|
475
489
|
|
|
476
490
|
os.makedirs(os.path.dirname(dest_image),exist_ok=True)
|
|
@@ -482,17 +496,24 @@ def coco_to_yolo(input_image_folder,
|
|
|
482
496
|
if (not os.path.isfile(dest_image)) or (overwrite_images):
|
|
483
497
|
shutil.copyfile(source_image,dest_image)
|
|
484
498
|
|
|
485
|
-
|
|
499
|
+
bboxes = output_info['bboxes']
|
|
500
|
+
|
|
501
|
+
# Write the annotation file if necessary
|
|
502
|
+
#
|
|
503
|
+
# Only write an annotation file if there are bounding boxes. Images with
|
|
504
|
+
# no .txt files are treated as hard negatives, at least by YOLOv5:
|
|
505
|
+
#
|
|
506
|
+
# https://github.com/ultralytics/yolov5/issues/3218
|
|
507
|
+
#
|
|
508
|
+
# I think this is also true for images with empty .txt files, but
|
|
509
|
+
# I'm using the convention suggested on that issue, i.e. hard
|
|
510
|
+
# negatives are expressed as images without .txt files.
|
|
511
|
+
if len(bboxes) > 0:
|
|
486
512
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
#
|
|
492
|
-
# I think this is also true for images with empty .txt files, but
|
|
493
|
-
# I'm using the convention suggested on that issue, i.e. hard
|
|
494
|
-
# negatives are expressed as images without .txt files.
|
|
495
|
-
if len(bboxes) > 0:
|
|
513
|
+
n_boxes_written += len(bboxes)
|
|
514
|
+
label_files_written.append(dest_txt)
|
|
515
|
+
|
|
516
|
+
if write_output:
|
|
496
517
|
|
|
497
518
|
with open(dest_txt,'w') as f:
|
|
498
519
|
|
|
@@ -501,8 +522,7 @@ def coco_to_yolo(input_image_folder,
|
|
|
501
522
|
assert len(bbox) == 5
|
|
502
523
|
s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
|
|
503
524
|
f.write(s + '\n')
|
|
504
|
-
|
|
505
|
-
# ...if we're actually writing output
|
|
525
|
+
|
|
506
526
|
|
|
507
527
|
# ...for each image
|
|
508
528
|
|
|
@@ -510,6 +530,8 @@ def coco_to_yolo(input_image_folder,
|
|
|
510
530
|
coco_to_yolo_info['class_list_filename'] = class_list_filename
|
|
511
531
|
coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
|
|
512
532
|
coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
|
|
533
|
+
coco_to_yolo_info['label_files_written'] = label_files_written
|
|
534
|
+
coco_to_yolo_info['n_boxes_written'] = n_boxes_written
|
|
513
535
|
|
|
514
536
|
return coco_to_yolo_info
|
|
515
537
|
|
|
@@ -24,8 +24,10 @@ import sys
|
|
|
24
24
|
|
|
25
25
|
#%% Merge functions
|
|
26
26
|
|
|
27
|
-
def combine_cct_files(input_files,
|
|
28
|
-
|
|
27
|
+
def combine_cct_files(input_files,
|
|
28
|
+
output_file=None,
|
|
29
|
+
require_uniqueness=True,
|
|
30
|
+
filename_prefixes=None):
|
|
29
31
|
"""
|
|
30
32
|
Merges the list of COCO Camera Traps files [input_files] into a single
|
|
31
33
|
dictionary, optionally writing the result to [output_file].
|
|
@@ -33,8 +35,10 @@ def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
|
|
|
33
35
|
Args:
|
|
34
36
|
input_files (list): paths to CCT .json files
|
|
35
37
|
output_file (str, optional): path to write merged .json file
|
|
36
|
-
require_uniqueness (bool): whether to require that the images in
|
|
38
|
+
require_uniqueness (bool, optional): whether to require that the images in
|
|
37
39
|
each input_dict be unique
|
|
40
|
+
filename_prefixes (dict, optional): dict mapping input filenames to strings
|
|
41
|
+
that should be prepended to image filenames from that source
|
|
38
42
|
|
|
39
43
|
Returns:
|
|
40
44
|
dict: the merged COCO-formatted .json dict
|
|
@@ -327,7 +327,7 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
327
327
|
|
|
328
328
|
for i_image,result in enumerate(results):
|
|
329
329
|
if result is not None:
|
|
330
|
-
validation_errors.append(images[i_image]['file_name'],result)
|
|
330
|
+
validation_errors.append((images[i_image]['file_name'],result))
|
|
331
331
|
|
|
332
332
|
# ...for each image
|
|
333
333
|
|
|
@@ -393,7 +393,7 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
393
393
|
elif image['_count'] > 1:
|
|
394
394
|
nMultiAnnotated += 1
|
|
395
395
|
|
|
396
|
-
print('
|
|
396
|
+
print('\nFound {} unannotated images, {} images with multiple annotations'.format(
|
|
397
397
|
nUnannotated,nMultiAnnotated))
|
|
398
398
|
|
|
399
399
|
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
@@ -349,7 +349,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
349
349
|
|
|
350
350
|
# ...with open()
|
|
351
351
|
|
|
352
|
-
print('
|
|
352
|
+
print('\nProcessed {} datasets'.format(len(metadata_table)))
|
|
353
353
|
|
|
354
354
|
|
|
355
355
|
#%% Read the .csv back
|
|
@@ -393,7 +393,7 @@ def check_row(row):
|
|
|
393
393
|
dataset_name_to_locations[ds_name].add(row['location_id'])
|
|
394
394
|
|
|
395
395
|
# Faster, but more annoying to debug
|
|
396
|
-
if
|
|
396
|
+
if True:
|
|
397
397
|
|
|
398
398
|
df.progress_apply(check_row, axis=1)
|
|
399
399
|
|
|
@@ -31,9 +31,10 @@ os.makedirs(metadata_dir,exist_ok=True)
|
|
|
31
31
|
md_results_dir = os.path.join(lila_local_base,'md_results')
|
|
32
32
|
os.makedirs(md_results_dir,exist_ok=True)
|
|
33
33
|
|
|
34
|
-
md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw',
|
|
34
|
+
md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw',
|
|
35
|
+
'md1000-redwood_results_raw','md_results_with_rde']
|
|
35
36
|
|
|
36
|
-
preferred_cloud = 'gcp' # 'azure', 'aws'
|
|
37
|
+
preferred_cloud = None # 'gcp' # 'azure', 'aws'
|
|
37
38
|
|
|
38
39
|
force_download = True
|
|
39
40
|
|
|
@@ -52,7 +53,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
|
|
|
52
53
|
|
|
53
54
|
#%% Download and extract metadata and MD results for each dataset
|
|
54
55
|
|
|
55
|
-
# Takes ~60 seconds if everything needs to
|
|
56
|
+
# Takes ~60 seconds if everything needs to be downloaded and unzipped
|
|
56
57
|
|
|
57
58
|
for ds_name in metadata_table.keys():
|
|
58
59
|
|
|
@@ -88,6 +89,8 @@ url_to_source = {}
|
|
|
88
89
|
# so we pick a semi-arbitrary image that isn't the first. How about the 2000th?
|
|
89
90
|
image_index = 2000
|
|
90
91
|
|
|
92
|
+
# TODO: parallelize this loop
|
|
93
|
+
#
|
|
91
94
|
# ds_name = list(metadata_table.keys())[0]
|
|
92
95
|
for ds_name in metadata_table.keys():
|
|
93
96
|
|
|
@@ -101,13 +104,21 @@ for ds_name in metadata_table.keys():
|
|
|
101
104
|
with open(json_filename, 'r') as f:
|
|
102
105
|
data = json.load(f)
|
|
103
106
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
107
|
+
if preferred_cloud is not None:
|
|
108
|
+
clouds = [preferred_cloud]
|
|
109
|
+
else:
|
|
110
|
+
clouds = ['gcp','aws','azure']
|
|
111
|
+
|
|
112
|
+
for cloud in clouds:
|
|
113
|
+
|
|
114
|
+
image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
|
|
115
|
+
assert not image_base_url.endswith('/')
|
|
116
|
+
|
|
117
|
+
# Download a test image
|
|
118
|
+
test_image_relative_path = data['images'][image_index]['file_name']
|
|
119
|
+
test_image_url = image_base_url + '/' + test_image_relative_path
|
|
120
|
+
|
|
121
|
+
url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
|
|
111
122
|
|
|
112
123
|
# Grab an image from the MegaDetector results
|
|
113
124
|
|
|
@@ -12,6 +12,7 @@ import os
|
|
|
12
12
|
import json
|
|
13
13
|
|
|
14
14
|
from copy import deepcopy
|
|
15
|
+
from megadetector.utils.ct_utils import invert_dictionary
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
#%% Main function
|
|
@@ -19,17 +20,27 @@ from copy import deepcopy
|
|
|
19
20
|
def remap_coco_categories(input_data,
|
|
20
21
|
output_category_name_to_id,
|
|
21
22
|
input_category_name_to_output_category_name,
|
|
22
|
-
output_file=None
|
|
23
|
+
output_file=None,
|
|
24
|
+
allow_unused_categories=False):
|
|
23
25
|
"""
|
|
24
26
|
Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
|
|
25
27
|
writing the results to a new file.
|
|
26
28
|
|
|
27
|
-
|
|
29
|
+
Args:
|
|
30
|
+
input_data (str or dict): a COCO-formatted dict or a filename. If it's a dict, it will
|
|
31
|
+
be copied, not modified in place.
|
|
32
|
+
output_category_name_to_id (dict) a dict mapping strings to ints. Categories not in
|
|
33
|
+
this dict will be ignored or will result in errors, depending on allow_unused_categories.
|
|
34
|
+
input_category_name_to_output_category_name: a dict mapping strings to strings.
|
|
35
|
+
Annotations using categories not in this dict will be omitted or will result in
|
|
36
|
+
errors, depending on allow_unused_categories.
|
|
37
|
+
output_file (str, optional): output file to which we should write remapped COCO data
|
|
38
|
+
allow_unused_categories (bool, optional): should we ignore categories not present in the
|
|
39
|
+
input/output mappings? If this is False and we encounter an unmapped category, we'll
|
|
40
|
+
error.
|
|
28
41
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
[input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
|
|
32
|
-
not modified in place.
|
|
42
|
+
Returns:
|
|
43
|
+
dict: COCO-formatted dict
|
|
33
44
|
"""
|
|
34
45
|
|
|
35
46
|
if isinstance(input_data,str):
|
|
@@ -48,23 +59,59 @@ def remap_coco_categories(input_data,
|
|
|
48
59
|
input_category_name_to_input_category_id = {}
|
|
49
60
|
for c in input_data['categories']:
|
|
50
61
|
input_category_name_to_input_category_id[c['name']] = c['id']
|
|
51
|
-
|
|
62
|
+
input_category_id_to_input_category_name = \
|
|
63
|
+
invert_dictionary(input_category_name_to_input_category_id)
|
|
64
|
+
|
|
52
65
|
# Map input IDs --> output IDs
|
|
53
66
|
input_category_id_to_output_category_id = {}
|
|
54
|
-
|
|
67
|
+
input_category_names = list(input_category_name_to_output_category_name.keys())
|
|
68
|
+
|
|
69
|
+
# input_name = input_category_names[0]
|
|
70
|
+
for input_name in input_category_names:
|
|
71
|
+
|
|
55
72
|
output_name = input_category_name_to_output_category_name[input_name]
|
|
56
73
|
assert output_name in output_category_name_to_id, \
|
|
57
74
|
'No output ID for {} --> {}'.format(input_name,output_name)
|
|
58
75
|
input_id = input_category_name_to_input_category_id[input_name]
|
|
59
76
|
output_id = output_category_name_to_id[output_name]
|
|
60
77
|
input_category_id_to_output_category_id[input_id] = output_id
|
|
78
|
+
|
|
79
|
+
# ...for each category we want to keep
|
|
61
80
|
|
|
81
|
+
printed_unused_category_warnings = set()
|
|
82
|
+
|
|
83
|
+
valid_annotations = []
|
|
84
|
+
|
|
62
85
|
# Map annotations
|
|
63
86
|
for ann in output_data['annotations']:
|
|
64
|
-
assert ann['category_id'] in input_category_id_to_output_category_id, \
|
|
65
|
-
'Unrecognized category ID {}'.format(ann['category_id'])
|
|
66
|
-
ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
|
|
67
87
|
|
|
88
|
+
input_category_id = ann['category_id']
|
|
89
|
+
if input_category_id not in input_category_id_to_output_category_id:
|
|
90
|
+
if allow_unused_categories:
|
|
91
|
+
if input_category_id not in printed_unused_category_warnings:
|
|
92
|
+
printed_unused_category_warnings.add(input_category_id)
|
|
93
|
+
input_category_name = \
|
|
94
|
+
input_category_id_to_input_category_name[input_category_id]
|
|
95
|
+
s = 'Skipping unmapped category ID {} ({})'.format(
|
|
96
|
+
input_category_id,input_category_name)
|
|
97
|
+
print(s)
|
|
98
|
+
continue
|
|
99
|
+
else:
|
|
100
|
+
s = 'Unmapped category ID {}'.format(input_category_id)
|
|
101
|
+
raise ValueError(s)
|
|
102
|
+
output_category_id = input_category_id_to_output_category_id[input_category_id]
|
|
103
|
+
ann['category_id'] = output_category_id
|
|
104
|
+
valid_annotations.append(ann)
|
|
105
|
+
|
|
106
|
+
# ...for each annotation
|
|
107
|
+
|
|
108
|
+
# The only reason annotations should get excluded is the case where we allow
|
|
109
|
+
# unused categories
|
|
110
|
+
if not allow_unused_categories:
|
|
111
|
+
assert len(valid_annotations) == len(output_data['annotations'])
|
|
112
|
+
|
|
113
|
+
output_data['annotations'] = valid_annotations
|
|
114
|
+
|
|
68
115
|
# Update the category list
|
|
69
116
|
output_categories = []
|
|
70
117
|
for output_name in output_category_name_to_id:
|
|
@@ -78,6 +125,8 @@ def remap_coco_categories(input_data,
|
|
|
78
125
|
|
|
79
126
|
return input_data
|
|
80
127
|
|
|
128
|
+
# ...def remap_coco_categories(...)
|
|
129
|
+
|
|
81
130
|
|
|
82
131
|
#%% Command-line driver
|
|
83
132
|
|
|
@@ -34,7 +34,7 @@ def _filename_to_image_id(fn):
|
|
|
34
34
|
return fn.replace(' ','_').replace('\\','/')
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
37
|
+
def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
|
|
38
38
|
"""
|
|
39
39
|
Internal support function for processing one image's labels.
|
|
40
40
|
"""
|
|
@@ -42,8 +42,8 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
|
42
42
|
# Create the image object for this image
|
|
43
43
|
#
|
|
44
44
|
# Always use forward slashes in image filenames and IDs
|
|
45
|
-
|
|
46
|
-
image_id = _filename_to_image_id(
|
|
45
|
+
image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
|
|
46
|
+
image_id = _filename_to_image_id(image_fn_relative)
|
|
47
47
|
|
|
48
48
|
# This is done in a separate loop now
|
|
49
49
|
#
|
|
@@ -53,7 +53,7 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
|
53
53
|
# image_ids.add(image_id)
|
|
54
54
|
|
|
55
55
|
im = {}
|
|
56
|
-
im['file_name'] =
|
|
56
|
+
im['file_name'] = image_fn_relative
|
|
57
57
|
im['id'] = image_id
|
|
58
58
|
|
|
59
59
|
annotations_this_image = []
|
|
@@ -65,14 +65,20 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
|
65
65
|
im['height'] = im_height
|
|
66
66
|
im['error'] = None
|
|
67
67
|
except Exception as e:
|
|
68
|
-
print('Warning: error reading {}:\n{}'.format(
|
|
68
|
+
print('Warning: error reading {}:\n{}'.format(image_fn_relative,str(e)))
|
|
69
69
|
im['width'] = -1
|
|
70
70
|
im['height'] = -1
|
|
71
71
|
im['error'] = str(e)
|
|
72
72
|
return (im,annotations_this_image)
|
|
73
73
|
|
|
74
74
|
# Is there an annotation file for this image?
|
|
75
|
-
|
|
75
|
+
if label_folder is not None:
|
|
76
|
+
assert input_folder in fn_abs
|
|
77
|
+
label_file_abs_base = fn_abs.replace(input_folder,label_folder)
|
|
78
|
+
else:
|
|
79
|
+
label_file_abs_base = fn_abs
|
|
80
|
+
|
|
81
|
+
annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
|
|
76
82
|
if not os.path.isfile(annotation_file):
|
|
77
83
|
annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
|
|
78
84
|
|
|
@@ -270,9 +276,14 @@ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
|
|
|
270
276
|
# ...def validate_label_file(...)
|
|
271
277
|
|
|
272
278
|
|
|
273
|
-
def validate_yolo_dataset(input_folder,
|
|
279
|
+
def validate_yolo_dataset(input_folder,
|
|
280
|
+
class_name_file,
|
|
281
|
+
n_workers=1,
|
|
282
|
+
pool_type='thread',
|
|
283
|
+
verbose=False):
|
|
274
284
|
"""
|
|
275
|
-
Verifies all the labels in a YOLO dataset folder.
|
|
285
|
+
Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
|
|
286
|
+
labels and images are in different folders (yolo_to_coco() supports this).
|
|
276
287
|
|
|
277
288
|
Looks for:
|
|
278
289
|
|
|
@@ -396,14 +407,17 @@ def yolo_to_coco(input_folder,
|
|
|
396
407
|
recursive=True,
|
|
397
408
|
exclude_string=None,
|
|
398
409
|
include_string=None,
|
|
399
|
-
overwrite_handling='overwrite'
|
|
410
|
+
overwrite_handling='overwrite',
|
|
411
|
+
label_folder=None):
|
|
400
412
|
"""
|
|
401
413
|
Converts a YOLO-formatted dataset to a COCO-formatted dataset.
|
|
402
414
|
|
|
403
415
|
All images will be assigned an "error" value, usually None.
|
|
404
416
|
|
|
405
417
|
Args:
|
|
406
|
-
input_folder (str): the YOLO dataset folder to
|
|
418
|
+
input_folder (str): the YOLO dataset folder to convert. If the image and label
|
|
419
|
+
folders are different, this is the image folder, and [label_folder] is the
|
|
420
|
+
label folder.
|
|
407
421
|
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
408
422
|
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
409
423
|
input_folder as the base folder, though this is not explicitly checked.
|
|
@@ -432,6 +446,7 @@ def yolo_to_coco(input_folder,
|
|
|
432
446
|
include_string (str, optional): include only images whose filename contains a string
|
|
433
447
|
overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
|
|
434
448
|
'error')
|
|
449
|
+
label_folder (str, optional): label folder, if different from the image folder
|
|
435
450
|
|
|
436
451
|
Returns:
|
|
437
452
|
dict: COCO-formatted data, the same as what's written to [output_file]
|
|
@@ -439,6 +454,8 @@ def yolo_to_coco(input_folder,
|
|
|
439
454
|
|
|
440
455
|
## Validate input
|
|
441
456
|
|
|
457
|
+
input_folder = input_folder.replace('\\','/')
|
|
458
|
+
|
|
442
459
|
assert os.path.isdir(input_folder)
|
|
443
460
|
assert os.path.isfile(class_name_file)
|
|
444
461
|
|
|
@@ -487,6 +504,7 @@ def yolo_to_coco(input_folder,
|
|
|
487
504
|
print('Enumerating images...')
|
|
488
505
|
|
|
489
506
|
image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
|
|
507
|
+
assert not any(['\\' in fn for fn in image_files_abs])
|
|
490
508
|
|
|
491
509
|
n_files_original = len(image_files_abs)
|
|
492
510
|
|
|
@@ -516,8 +534,14 @@ def yolo_to_coco(input_folder,
|
|
|
516
534
|
|
|
517
535
|
if not allow_images_without_label_files:
|
|
518
536
|
print('Verifying that label files exist')
|
|
537
|
+
# image_file_abs = image_files_abs[0]
|
|
519
538
|
for image_file_abs in tqdm(image_files_abs):
|
|
520
|
-
|
|
539
|
+
if label_folder is not None:
|
|
540
|
+
assert input_folder in image_file_abs
|
|
541
|
+
label_file_abs_base = image_file_abs.replace(input_folder,label_folder)
|
|
542
|
+
else:
|
|
543
|
+
label_file_abs_base = image_file_abs
|
|
544
|
+
label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
|
|
521
545
|
assert os.path.isfile(label_file_abs), \
|
|
522
546
|
'No annotation file for {}'.format(image_file_abs)
|
|
523
547
|
|
|
@@ -528,7 +552,7 @@ def yolo_to_coco(input_folder,
|
|
|
528
552
|
|
|
529
553
|
for fn_abs in tqdm(image_files_abs):
|
|
530
554
|
|
|
531
|
-
fn_relative = os.path.relpath(fn_abs,input_folder)
|
|
555
|
+
fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
|
|
532
556
|
image_id = _filename_to_image_id(fn_relative)
|
|
533
557
|
assert image_id not in image_ids, \
|
|
534
558
|
'Oops, you have hit a very esoteric case where you have the same filename ' + \
|
|
@@ -543,8 +567,12 @@ def yolo_to_coco(input_folder,
|
|
|
543
567
|
if n_workers <= 1:
|
|
544
568
|
|
|
545
569
|
image_results = []
|
|
570
|
+
# fn_abs = image_files_abs[0]
|
|
546
571
|
for fn_abs in tqdm(image_files_abs):
|
|
547
|
-
image_results.append(_process_image(fn_abs,
|
|
572
|
+
image_results.append(_process_image(fn_abs,
|
|
573
|
+
input_folder,
|
|
574
|
+
category_id_to_name,
|
|
575
|
+
label_folder))
|
|
548
576
|
|
|
549
577
|
else:
|
|
550
578
|
|
|
@@ -557,8 +585,10 @@ def yolo_to_coco(input_folder,
|
|
|
557
585
|
|
|
558
586
|
print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
|
|
559
587
|
|
|
560
|
-
p = partial(_process_image,
|
|
561
|
-
|
|
588
|
+
p = partial(_process_image,
|
|
589
|
+
input_folder=input_folder,
|
|
590
|
+
category_id_to_name=category_id_to_name,
|
|
591
|
+
label_folder=label_folder)
|
|
562
592
|
image_results = list(tqdm(pool.imap(p, image_files_abs),
|
|
563
593
|
total=len(image_files_abs)))
|
|
564
594
|
|