megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
yolo_to_coco.py
|
|
4
4
|
|
|
5
|
-
Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
|
|
5
|
+
Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -10,6 +10,8 @@ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset
|
|
|
10
10
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
|
+
import argparse
|
|
14
|
+
import sys
|
|
13
15
|
|
|
14
16
|
from multiprocessing.pool import ThreadPool
|
|
15
17
|
from multiprocessing.pool import Pool
|
|
@@ -22,7 +24,8 @@ from megadetector.utils.path_utils import recursive_file_list
|
|
|
22
24
|
from megadetector.utils.path_utils import find_image_strings
|
|
23
25
|
from megadetector.utils.ct_utils import invert_dictionary
|
|
24
26
|
from megadetector.visualization.visualization_utils import open_image
|
|
25
|
-
from megadetector.data_management.yolo_output_to_md_output import
|
|
27
|
+
from megadetector.data_management.yolo_output_to_md_output import \
|
|
28
|
+
read_classes_from_yolo_dataset_file
|
|
26
29
|
|
|
27
30
|
|
|
28
31
|
#%% Support functions
|
|
@@ -31,6 +34,7 @@ def _filename_to_image_id(fn):
|
|
|
31
34
|
"""
|
|
32
35
|
Image IDs can't have spaces in them, replace spaces with underscores
|
|
33
36
|
"""
|
|
37
|
+
|
|
34
38
|
return fn.replace(' ','_').replace('\\','/')
|
|
35
39
|
|
|
36
40
|
|
|
@@ -38,27 +42,27 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
|
|
|
38
42
|
"""
|
|
39
43
|
Internal support function for processing one image's labels.
|
|
40
44
|
"""
|
|
41
|
-
|
|
45
|
+
|
|
42
46
|
# Create the image object for this image
|
|
43
47
|
#
|
|
44
48
|
# Always use forward slashes in image filenames and IDs
|
|
45
49
|
image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
|
|
46
50
|
image_id = _filename_to_image_id(image_fn_relative)
|
|
47
|
-
|
|
51
|
+
|
|
48
52
|
# This is done in a separate loop now
|
|
49
53
|
#
|
|
50
54
|
# assert image_id not in image_ids, \
|
|
51
55
|
# 'Oops, you have hit a very esoteric case where you have the same filename ' + \
|
|
52
56
|
# 'with both spaces and underscores, this is not currently handled.'
|
|
53
57
|
# image_ids.add(image_id)
|
|
54
|
-
|
|
58
|
+
|
|
55
59
|
im = {}
|
|
56
60
|
im['file_name'] = image_fn_relative
|
|
57
61
|
im['id'] = image_id
|
|
58
|
-
|
|
62
|
+
|
|
59
63
|
annotations_this_image = []
|
|
60
|
-
|
|
61
|
-
try:
|
|
64
|
+
|
|
65
|
+
try:
|
|
62
66
|
pil_im = open_image(fn_abs)
|
|
63
67
|
im_width, im_height = pil_im.size
|
|
64
68
|
im['width'] = im_width
|
|
@@ -70,32 +74,32 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
|
|
|
70
74
|
im['height'] = -1
|
|
71
75
|
im['error'] = str(e)
|
|
72
76
|
return (im,annotations_this_image)
|
|
73
|
-
|
|
77
|
+
|
|
74
78
|
# Is there an annotation file for this image?
|
|
75
79
|
if label_folder is not None:
|
|
76
80
|
assert input_folder in fn_abs
|
|
77
81
|
label_file_abs_base = fn_abs.replace(input_folder,label_folder)
|
|
78
82
|
else:
|
|
79
83
|
label_file_abs_base = fn_abs
|
|
80
|
-
|
|
84
|
+
|
|
81
85
|
annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
|
|
82
86
|
if not os.path.isfile(annotation_file):
|
|
83
87
|
annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
|
|
84
|
-
|
|
88
|
+
|
|
85
89
|
if os.path.isfile(annotation_file):
|
|
86
|
-
|
|
90
|
+
|
|
87
91
|
with open(annotation_file,'r') as f:
|
|
88
92
|
lines = f.readlines()
|
|
89
93
|
lines = [s.strip() for s in lines]
|
|
90
|
-
|
|
94
|
+
|
|
91
95
|
# s = lines[0]
|
|
92
96
|
annotation_number = 0
|
|
93
|
-
|
|
97
|
+
|
|
94
98
|
for s in lines:
|
|
95
|
-
|
|
99
|
+
|
|
96
100
|
if len(s.strip()) == 0:
|
|
97
101
|
continue
|
|
98
|
-
|
|
102
|
+
|
|
99
103
|
tokens = s.split()
|
|
100
104
|
assert len(tokens) == 5
|
|
101
105
|
category_id = int(tokens[0])
|
|
@@ -107,35 +111,35 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
|
|
|
107
111
|
ann['image_id'] = im['id']
|
|
108
112
|
ann['category_id'] = category_id
|
|
109
113
|
ann['sequence_level_annotation'] = False
|
|
110
|
-
|
|
114
|
+
|
|
111
115
|
# COCO: [x_min, y_min, width, height] in absolute coordinates
|
|
112
116
|
# YOLO: [class, x_center, y_center, width, height] in normalized coordinates
|
|
113
|
-
|
|
117
|
+
|
|
114
118
|
yolo_bbox = [float(x) for x in tokens[1:]]
|
|
115
|
-
|
|
119
|
+
|
|
116
120
|
normalized_x_center = yolo_bbox[0]
|
|
117
121
|
normalized_y_center = yolo_bbox[1]
|
|
118
122
|
normalized_width = yolo_bbox[2]
|
|
119
123
|
normalized_height = yolo_bbox[3]
|
|
120
|
-
|
|
121
|
-
absolute_x_center = normalized_x_center * im_width
|
|
124
|
+
|
|
125
|
+
absolute_x_center = normalized_x_center * im_width
|
|
122
126
|
absolute_y_center = normalized_y_center * im_height
|
|
123
127
|
absolute_width = normalized_width * im_width
|
|
124
128
|
absolute_height = normalized_height * im_height
|
|
125
129
|
absolute_x_min = absolute_x_center - absolute_width / 2
|
|
126
130
|
absolute_y_min = absolute_y_center - absolute_height / 2
|
|
127
|
-
|
|
131
|
+
|
|
128
132
|
coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
|
|
129
|
-
|
|
133
|
+
|
|
130
134
|
ann['bbox'] = coco_bbox
|
|
131
135
|
annotation_number += 1
|
|
132
|
-
|
|
133
|
-
annotations_this_image.append(ann)
|
|
134
|
-
|
|
135
|
-
# ...for each annotation
|
|
136
|
-
|
|
136
|
+
|
|
137
|
+
annotations_this_image.append(ann)
|
|
138
|
+
|
|
139
|
+
# ...for each annotation
|
|
140
|
+
|
|
137
141
|
# ...if this image has annotations
|
|
138
|
-
|
|
142
|
+
|
|
139
143
|
return (im,annotations_this_image)
|
|
140
144
|
|
|
141
145
|
# ...def _process_image(...)
|
|
@@ -144,37 +148,37 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
|
|
|
144
148
|
def load_yolo_class_list(class_name_file):
|
|
145
149
|
"""
|
|
146
150
|
Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
|
|
147
|
-
[class_name_file].
|
|
148
|
-
|
|
151
|
+
[class_name_file].
|
|
152
|
+
|
|
149
153
|
Args:
|
|
150
154
|
class_name_file (str or list): this can be:
|
|
151
155
|
- a .yaml or .yaml file in YOLO's dataset.yaml format
|
|
152
156
|
- a .txt or .data file containing a flat list of class names
|
|
153
157
|
- a list of class names
|
|
154
|
-
|
|
158
|
+
|
|
155
159
|
Returns:
|
|
156
160
|
dict: A dict mapping zero-indexed integer IDs to class names
|
|
157
161
|
"""
|
|
158
|
-
|
|
162
|
+
|
|
159
163
|
# class_name_file can also be a list of class names
|
|
160
164
|
if isinstance(class_name_file,list):
|
|
161
165
|
category_id_to_name = {}
|
|
162
166
|
for i_name,name in enumerate(class_name_file):
|
|
163
167
|
category_id_to_name[i_name] = name
|
|
164
168
|
return category_id_to_name
|
|
165
|
-
|
|
169
|
+
|
|
166
170
|
ext = os.path.splitext(class_name_file)[1][1:]
|
|
167
171
|
assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
|
|
168
172
|
class_name_file)
|
|
169
|
-
|
|
173
|
+
|
|
170
174
|
if ext in ('txt','data'):
|
|
171
|
-
|
|
175
|
+
|
|
172
176
|
with open(class_name_file,'r') as f:
|
|
173
177
|
lines = f.readlines()
|
|
174
178
|
assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
|
|
175
179
|
class_names = [s.strip() for s in lines]
|
|
176
180
|
assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
|
|
177
|
-
|
|
181
|
+
|
|
178
182
|
# Blank lines should only appear at the end
|
|
179
183
|
b_found_blank = False
|
|
180
184
|
for s in lines:
|
|
@@ -183,17 +187,17 @@ def load_yolo_class_list(class_name_file):
|
|
|
183
187
|
elif b_found_blank:
|
|
184
188
|
raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
|
|
185
189
|
class_name_file))
|
|
186
|
-
|
|
187
|
-
category_id_to_name = {}
|
|
190
|
+
|
|
191
|
+
category_id_to_name = {}
|
|
188
192
|
for i_category_id,category_name in enumerate(class_names):
|
|
189
193
|
assert len(category_name) > 0
|
|
190
194
|
category_id_to_name[i_category_id] = category_name
|
|
191
|
-
|
|
195
|
+
|
|
192
196
|
else:
|
|
193
|
-
|
|
197
|
+
|
|
194
198
|
assert ext in ('yml','yaml')
|
|
195
199
|
category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
|
|
196
|
-
|
|
200
|
+
|
|
197
201
|
return category_id_to_name
|
|
198
202
|
|
|
199
203
|
# ...load_yolo_class_list(...)
|
|
@@ -202,91 +206,91 @@ def load_yolo_class_list(class_name_file):
|
|
|
202
206
|
def validate_label_file(label_file,category_id_to_name=None,verbose=False):
|
|
203
207
|
""""
|
|
204
208
|
Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
|
|
205
|
-
|
|
209
|
+
|
|
206
210
|
Args:
|
|
207
211
|
label_file (str): the .txt file to validate
|
|
208
212
|
category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
|
|
209
213
|
if this is not None, this function errors if the file uses a category that's not
|
|
210
214
|
in this dict
|
|
211
215
|
verbose (bool, optional): enable additional debug console output
|
|
212
|
-
|
|
216
|
+
|
|
213
217
|
Returns:
|
|
214
|
-
dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
|
|
218
|
+
dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
|
|
215
219
|
errors (if any) that we found in this file)
|
|
216
220
|
"""
|
|
217
|
-
|
|
221
|
+
|
|
218
222
|
label_result = {}
|
|
219
223
|
label_result['file'] = label_file
|
|
220
224
|
label_result['errors'] = []
|
|
221
|
-
|
|
225
|
+
|
|
222
226
|
try:
|
|
223
227
|
with open(label_file,'r') as f:
|
|
224
228
|
lines = f.readlines()
|
|
225
229
|
except Exception as e:
|
|
226
230
|
label_result['errors'].append('Read error: {}'.format(str(e)))
|
|
227
231
|
return label_result
|
|
228
|
-
|
|
232
|
+
|
|
229
233
|
# i_line 0; line = lines[i_line]
|
|
230
234
|
for i_line,line in enumerate(lines):
|
|
231
235
|
s = line.strip()
|
|
232
236
|
if len(s) == 0 or s[0] == '#':
|
|
233
237
|
continue
|
|
234
|
-
|
|
238
|
+
|
|
235
239
|
try:
|
|
236
|
-
|
|
240
|
+
|
|
237
241
|
tokens = s.split()
|
|
238
|
-
assert len(tokens) == 5, '{} tokens'.format(len(tokens))
|
|
239
|
-
|
|
242
|
+
assert len(tokens) == 5, '{} tokens'.format(len(tokens))
|
|
243
|
+
|
|
240
244
|
if category_id_to_name is not None:
|
|
241
245
|
category_id = int(tokens[0])
|
|
242
246
|
assert category_id in category_id_to_name, \
|
|
243
247
|
'Unrecognized category ID {}'.format(category_id)
|
|
244
|
-
|
|
248
|
+
|
|
245
249
|
yolo_bbox = [float(x) for x in tokens[1:]]
|
|
246
|
-
|
|
250
|
+
|
|
247
251
|
except Exception as e:
|
|
248
252
|
label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
|
|
249
253
|
continue
|
|
250
|
-
|
|
254
|
+
|
|
251
255
|
normalized_x_center = yolo_bbox[0]
|
|
252
256
|
normalized_y_center = yolo_bbox[1]
|
|
253
257
|
normalized_width = yolo_bbox[2]
|
|
254
258
|
normalized_height = yolo_bbox[3]
|
|
255
|
-
|
|
259
|
+
|
|
256
260
|
normalized_x_min = normalized_x_center - normalized_width / 2.0
|
|
257
261
|
normalized_x_max = normalized_x_center + normalized_width / 2.0
|
|
258
262
|
normalized_y_min = normalized_y_center - normalized_height / 2.0
|
|
259
263
|
normalized_y_max = normalized_y_center + normalized_height / 2.0
|
|
260
|
-
|
|
264
|
+
|
|
261
265
|
if normalized_x_min < 0 or normalized_y_min < 0 or \
|
|
262
266
|
normalized_x_max > 1 or normalized_y_max > 1:
|
|
263
267
|
label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
|
|
264
268
|
normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
|
|
265
|
-
|
|
269
|
+
|
|
266
270
|
# ...for each line
|
|
267
|
-
|
|
271
|
+
|
|
268
272
|
if verbose:
|
|
269
273
|
if len(label_result['errors']) > 0:
|
|
270
274
|
print('Errors for {}:'.format(label_file))
|
|
271
275
|
for error in label_result['errors']:
|
|
272
276
|
print(error)
|
|
273
|
-
|
|
277
|
+
|
|
274
278
|
return label_result
|
|
275
|
-
|
|
279
|
+
|
|
276
280
|
# ...def validate_label_file(...)
|
|
277
281
|
|
|
278
|
-
|
|
279
|
-
def validate_yolo_dataset(input_folder,
|
|
280
|
-
class_name_file,
|
|
281
|
-
n_workers=1,
|
|
282
|
-
pool_type='thread',
|
|
282
|
+
|
|
283
|
+
def validate_yolo_dataset(input_folder,
|
|
284
|
+
class_name_file,
|
|
285
|
+
n_workers=1,
|
|
286
|
+
pool_type='thread',
|
|
283
287
|
verbose=False):
|
|
284
288
|
"""
|
|
285
|
-
Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
|
|
289
|
+
Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
|
|
286
290
|
labels and images are in different folders (yolo_to_coco() supports this).
|
|
287
|
-
|
|
291
|
+
|
|
288
292
|
Looks for:
|
|
289
|
-
|
|
293
|
+
|
|
290
294
|
* Image files without label files
|
|
291
295
|
* Text files without image files
|
|
292
296
|
* Illegal classes in label files
|
|
@@ -294,103 +298,109 @@ def validate_yolo_dataset(input_folder,
|
|
|
294
298
|
|
|
295
299
|
Args:
|
|
296
300
|
input_folder (str): the YOLO dataset folder to validate
|
|
297
|
-
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
298
|
-
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
301
|
+
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
302
|
+
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
299
303
|
input_folder as the base folder, though this is not explicitly checked.
|
|
300
304
|
n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
|
|
301
305
|
parallelization
|
|
302
306
|
pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
|
|
303
307
|
not used if [n_workers] <= 1
|
|
304
308
|
verbose (bool, optional): enable additional debug console output
|
|
305
|
-
|
|
309
|
+
|
|
306
310
|
Returns:
|
|
307
|
-
dict: validation results, as a dict with fields:
|
|
308
|
-
|
|
311
|
+
dict: validation results, as a dict with fields:
|
|
312
|
+
|
|
309
313
|
- image_files_without_label_files (list)
|
|
310
314
|
- label_files_without_image_files (list)
|
|
311
315
|
- label_results (list of dicts with field 'filename', 'errors') (list)
|
|
312
316
|
"""
|
|
313
|
-
|
|
317
|
+
|
|
314
318
|
# Validate arguments
|
|
315
319
|
assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
|
|
316
320
|
if n_workers > 1:
|
|
317
321
|
assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
|
|
318
|
-
|
|
322
|
+
|
|
319
323
|
category_id_to_name = load_yolo_class_list(class_name_file)
|
|
320
|
-
|
|
324
|
+
|
|
321
325
|
print('Enumerating files in {}'.format(input_folder))
|
|
322
|
-
|
|
326
|
+
|
|
323
327
|
all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
|
|
324
328
|
convert_slashes=True)
|
|
325
329
|
label_files = [fn for fn in all_files if fn.endswith('.txt')]
|
|
326
330
|
image_files = find_image_strings(all_files)
|
|
327
331
|
print('Found {} images files and {} label files in {}'.format(
|
|
328
332
|
len(image_files),len(label_files),input_folder))
|
|
329
|
-
|
|
333
|
+
|
|
330
334
|
label_files_set = set(label_files)
|
|
331
|
-
|
|
335
|
+
|
|
332
336
|
image_files_without_extension = set()
|
|
333
337
|
for fn in image_files:
|
|
334
338
|
image_file_without_extension = os.path.splitext(fn)[0]
|
|
335
339
|
assert image_file_without_extension not in image_files_without_extension, \
|
|
336
340
|
'Duplicate image file, likely with different extensions: {}'.format(fn)
|
|
337
341
|
image_files_without_extension.add(image_file_without_extension)
|
|
338
|
-
|
|
342
|
+
|
|
339
343
|
print('Looking for missing image/label files')
|
|
340
|
-
|
|
344
|
+
|
|
341
345
|
image_files_without_label_files = []
|
|
342
346
|
label_files_without_images = []
|
|
343
|
-
|
|
347
|
+
|
|
344
348
|
for image_file in tqdm(image_files):
|
|
345
349
|
expected_label_file = os.path.splitext(image_file)[0] + '.txt'
|
|
346
350
|
if expected_label_file not in label_files_set:
|
|
347
351
|
image_files_without_label_files.append(image_file)
|
|
348
|
-
|
|
352
|
+
|
|
349
353
|
for label_file in tqdm(label_files):
|
|
350
354
|
expected_image_file_without_extension = os.path.splitext(label_file)[0]
|
|
351
355
|
if expected_image_file_without_extension not in image_files_without_extension:
|
|
352
356
|
label_files_without_images.append(label_file)
|
|
353
|
-
|
|
357
|
+
|
|
354
358
|
print('Found {} image files without labels, {} labels without images'.format(
|
|
355
359
|
len(image_files_without_label_files),len(label_files_without_images)))
|
|
356
360
|
|
|
357
361
|
print('Validating label files')
|
|
358
|
-
|
|
362
|
+
|
|
359
363
|
if n_workers <= 1:
|
|
360
|
-
|
|
361
|
-
label_results = []
|
|
362
|
-
for fn_abs in tqdm(label_files):
|
|
364
|
+
|
|
365
|
+
label_results = []
|
|
366
|
+
for fn_abs in tqdm(label_files):
|
|
363
367
|
label_results.append(validate_label_file(fn_abs,
|
|
364
368
|
category_id_to_name=category_id_to_name,
|
|
365
369
|
verbose=verbose))
|
|
366
|
-
|
|
370
|
+
|
|
367
371
|
else:
|
|
368
|
-
|
|
372
|
+
|
|
369
373
|
assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
374
|
+
|
|
375
|
+
pool = None
|
|
376
|
+
try:
|
|
377
|
+
if pool_type == 'thread':
|
|
378
|
+
pool = ThreadPool(n_workers)
|
|
379
|
+
else:
|
|
380
|
+
pool = Pool(n_workers)
|
|
381
|
+
|
|
382
|
+
print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
|
|
383
|
+
|
|
384
|
+
p = partial(validate_label_file,
|
|
385
|
+
category_id_to_name=category_id_to_name,
|
|
386
|
+
verbose=verbose)
|
|
387
|
+
label_results = list(tqdm(pool.imap(p, label_files),
|
|
388
|
+
total=len(label_files)))
|
|
389
|
+
finally:
|
|
390
|
+
pool.close()
|
|
391
|
+
pool.join()
|
|
392
|
+
print("Pool closed and joined for label file validation")
|
|
393
|
+
|
|
384
394
|
assert len(label_results) == len(label_files)
|
|
385
|
-
|
|
395
|
+
|
|
386
396
|
validation_results = {}
|
|
387
397
|
validation_results['image_files_without_label_files'] = image_files_without_label_files
|
|
388
398
|
validation_results['label_files_without_images'] = label_files_without_images
|
|
389
399
|
validation_results['label_results'] = label_results
|
|
390
|
-
|
|
400
|
+
|
|
391
401
|
return validation_results
|
|
392
|
-
|
|
393
|
-
# ...validate_yolo_dataset(...)
|
|
402
|
+
|
|
403
|
+
# ...validate_yolo_dataset(...)
|
|
394
404
|
|
|
395
405
|
|
|
396
406
|
#%% Main conversion function
|
|
@@ -411,32 +421,35 @@ def yolo_to_coco(input_folder,
|
|
|
411
421
|
label_folder=None):
|
|
412
422
|
"""
|
|
413
423
|
Converts a YOLO-formatted dataset to a COCO-formatted dataset.
|
|
414
|
-
|
|
415
|
-
All images will be assigned an "error" value, usually None.
|
|
416
|
-
|
|
424
|
+
|
|
425
|
+
All images will be assigned an "error" value, usually None.
|
|
426
|
+
|
|
417
427
|
Args:
|
|
418
|
-
input_folder (str): the YOLO dataset folder to convert. If the image and label
|
|
428
|
+
input_folder (str): the YOLO dataset folder to convert. If the image and label
|
|
419
429
|
folders are different, this is the image folder, and [label_folder] is the
|
|
420
430
|
label folder.
|
|
421
|
-
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
422
|
-
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
431
|
+
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
432
|
+
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
423
433
|
input_folder as the base folder, though this is not explicitly checked.
|
|
424
434
|
output_file (str, optional): .json file to which we should write COCO .json data
|
|
425
435
|
empty_image_handling (str, optional): how to handle images with no boxes; whether
|
|
426
|
-
this includes images with no .txt files
|
|
436
|
+
this includes images with no .txt files depends on the value of
|
|
427
437
|
[allow_images_without_label_files]. Can be:
|
|
428
|
-
|
|
438
|
+
|
|
429
439
|
- 'no_annotations': include the image in the image list, with no annotations
|
|
430
440
|
- 'empty_annotations': include the image in the image list, and add an annotation without
|
|
431
441
|
any bounding boxes, using a category called [empty_image_category_name].
|
|
432
442
|
- 'skip': don't include the image in the image list
|
|
433
|
-
- 'error': there shouldn't be any empty images
|
|
443
|
+
- 'error': there shouldn't be any empty images
|
|
444
|
+
empty_image_category_name (str, optional): if we're going to be inserting annotations for
|
|
445
|
+
images with no boxes, what category name should we use?
|
|
434
446
|
error_image_handling (str, optional): how to handle images that don't load properly; can
|
|
435
447
|
be:
|
|
436
|
-
|
|
448
|
+
|
|
437
449
|
- 'skip': don't include the image at all
|
|
438
450
|
- 'no_annotations': include with no annotations
|
|
439
|
-
|
|
451
|
+
allow_images_without_label_files (bool, optional): whether to silently allow images with
|
|
452
|
+
no label files (True) or raise errors for images with no label files (False)
|
|
440
453
|
n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
|
|
441
454
|
parallelization
|
|
442
455
|
pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
|
|
@@ -444,27 +457,27 @@ def yolo_to_coco(input_folder,
|
|
|
444
457
|
recursive (bool, optional): whether to recurse into [input_folder]
|
|
445
458
|
exclude_string (str, optional): exclude any images whose filename contains a string
|
|
446
459
|
include_string (str, optional): include only images whose filename contains a string
|
|
447
|
-
overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
|
|
460
|
+
overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
|
|
448
461
|
'error')
|
|
449
462
|
label_folder (str, optional): label folder, if different from the image folder
|
|
450
|
-
|
|
463
|
+
|
|
451
464
|
Returns:
|
|
452
465
|
dict: COCO-formatted data, the same as what's written to [output_file]
|
|
453
466
|
"""
|
|
454
|
-
|
|
467
|
+
|
|
455
468
|
## Validate input
|
|
456
|
-
|
|
469
|
+
|
|
457
470
|
input_folder = input_folder.replace('\\','/')
|
|
458
|
-
|
|
471
|
+
|
|
459
472
|
assert os.path.isdir(input_folder)
|
|
460
473
|
assert os.path.isfile(class_name_file)
|
|
461
|
-
|
|
474
|
+
|
|
462
475
|
assert empty_image_handling in \
|
|
463
476
|
('no_annotations','empty_annotations','skip','error'), \
|
|
464
477
|
'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
|
|
465
|
-
|
|
478
|
+
|
|
466
479
|
if (output_file is not None) and os.path.isfile(output_file):
|
|
467
|
-
|
|
480
|
+
|
|
468
481
|
if overwrite_handling == 'overwrite':
|
|
469
482
|
print('Warning: output file {} exists, over-writing'.format(output_file))
|
|
470
483
|
elif overwrite_handling == 'load':
|
|
@@ -476,62 +489,62 @@ def yolo_to_coco(input_folder,
|
|
|
476
489
|
raise ValueError('Output file {} exists'.format(output_file))
|
|
477
490
|
else:
|
|
478
491
|
raise ValueError('Unrecognized overwrite_handling value: {}'.format(overwrite_handling))
|
|
479
|
-
|
|
480
|
-
|
|
492
|
+
|
|
493
|
+
|
|
481
494
|
## Read class names
|
|
482
|
-
|
|
495
|
+
|
|
483
496
|
category_id_to_name = load_yolo_class_list(class_name_file)
|
|
484
|
-
|
|
485
|
-
|
|
497
|
+
|
|
498
|
+
|
|
486
499
|
# Find or create the empty image category, if necessary
|
|
487
500
|
empty_category_id = None
|
|
488
|
-
|
|
489
|
-
if
|
|
501
|
+
|
|
502
|
+
if empty_image_handling == 'empty_annotations':
|
|
490
503
|
category_name_to_id = invert_dictionary(category_id_to_name)
|
|
491
504
|
if empty_image_category_name in category_name_to_id:
|
|
492
505
|
empty_category_id = category_name_to_id[empty_image_category_name]
|
|
493
506
|
print('Using existing empty image category with name {}, ID {}'.format(
|
|
494
|
-
empty_image_category_name,empty_category_id))
|
|
507
|
+
empty_image_category_name,empty_category_id))
|
|
495
508
|
else:
|
|
496
509
|
empty_category_id = len(category_id_to_name)
|
|
497
510
|
print('Adding an empty category with name {}, ID {}'.format(
|
|
498
511
|
empty_image_category_name,empty_category_id))
|
|
499
512
|
category_id_to_name[empty_category_id] = empty_image_category_name
|
|
500
|
-
|
|
501
|
-
|
|
513
|
+
|
|
514
|
+
|
|
502
515
|
## Enumerate images
|
|
503
|
-
|
|
516
|
+
|
|
504
517
|
print('Enumerating images...')
|
|
505
|
-
|
|
518
|
+
|
|
506
519
|
image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
|
|
507
520
|
assert not any(['\\' in fn for fn in image_files_abs])
|
|
508
521
|
|
|
509
522
|
n_files_original = len(image_files_abs)
|
|
510
|
-
|
|
523
|
+
|
|
511
524
|
# Optionally include/exclude images matching specific strings
|
|
512
525
|
if exclude_string is not None:
|
|
513
526
|
image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
|
|
514
527
|
if include_string is not None:
|
|
515
528
|
image_files_abs = [fn for fn in image_files_abs if include_string in fn]
|
|
516
|
-
|
|
529
|
+
|
|
517
530
|
if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
|
|
518
531
|
n_excluded = n_files_original - len(image_files_abs)
|
|
519
532
|
print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
|
|
520
|
-
|
|
533
|
+
|
|
521
534
|
categories = []
|
|
522
|
-
|
|
535
|
+
|
|
523
536
|
for category_id in category_id_to_name:
|
|
524
537
|
categories.append({'id':category_id,'name':category_id_to_name[category_id]})
|
|
525
|
-
|
|
538
|
+
|
|
526
539
|
info = {}
|
|
527
540
|
info['version'] = '1.0'
|
|
528
541
|
info['description'] = 'Converted from YOLO format'
|
|
529
|
-
|
|
542
|
+
|
|
530
543
|
image_ids = set()
|
|
531
|
-
|
|
532
|
-
|
|
544
|
+
|
|
545
|
+
|
|
533
546
|
## If we're expected to have labels for every image, check before we process all the images
|
|
534
|
-
|
|
547
|
+
|
|
535
548
|
if not allow_images_without_label_files:
|
|
536
549
|
print('Verifying that label files exist')
|
|
537
550
|
# image_file_abs = image_files_abs[0]
|
|
@@ -544,88 +557,88 @@ def yolo_to_coco(input_folder,
|
|
|
544
557
|
label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
|
|
545
558
|
assert os.path.isfile(label_file_abs), \
|
|
546
559
|
'No annotation file for {}'.format(image_file_abs)
|
|
547
|
-
|
|
548
|
-
|
|
560
|
+
|
|
561
|
+
|
|
549
562
|
## Initial loop to make sure image IDs will be unique
|
|
550
|
-
|
|
563
|
+
|
|
551
564
|
print('Validating image IDs...')
|
|
552
|
-
|
|
565
|
+
|
|
553
566
|
for fn_abs in tqdm(image_files_abs):
|
|
554
|
-
|
|
567
|
+
|
|
555
568
|
fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
|
|
556
569
|
image_id = _filename_to_image_id(fn_relative)
|
|
557
570
|
assert image_id not in image_ids, \
|
|
558
571
|
'Oops, you have hit a very esoteric case where you have the same filename ' + \
|
|
559
572
|
'with both spaces and underscores, this is not currently handled.'
|
|
560
573
|
image_ids.add(image_id)
|
|
561
|
-
|
|
562
|
-
|
|
574
|
+
|
|
575
|
+
|
|
563
576
|
## Main loop to process labels
|
|
564
|
-
|
|
577
|
+
|
|
565
578
|
print('Processing labels...')
|
|
566
|
-
|
|
579
|
+
|
|
567
580
|
if n_workers <= 1:
|
|
568
|
-
|
|
569
|
-
image_results = []
|
|
581
|
+
|
|
582
|
+
image_results = []
|
|
570
583
|
# fn_abs = image_files_abs[0]
|
|
571
|
-
for fn_abs in tqdm(image_files_abs):
|
|
584
|
+
for fn_abs in tqdm(image_files_abs):
|
|
572
585
|
image_results.append(_process_image(fn_abs,
|
|
573
586
|
input_folder,
|
|
574
587
|
category_id_to_name,
|
|
575
588
|
label_folder))
|
|
576
|
-
|
|
589
|
+
|
|
577
590
|
else:
|
|
578
|
-
|
|
591
|
+
|
|
579
592
|
assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
|
|
580
|
-
|
|
593
|
+
|
|
581
594
|
if pool_type == 'thread':
|
|
582
595
|
pool = ThreadPool(n_workers)
|
|
583
596
|
else:
|
|
584
597
|
pool = Pool(n_workers)
|
|
585
|
-
|
|
598
|
+
|
|
586
599
|
print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
|
|
587
|
-
|
|
600
|
+
|
|
588
601
|
p = partial(_process_image,
|
|
589
602
|
input_folder=input_folder,
|
|
590
603
|
category_id_to_name=category_id_to_name,
|
|
591
604
|
label_folder=label_folder)
|
|
592
605
|
image_results = list(tqdm(pool.imap(p, image_files_abs),
|
|
593
606
|
total=len(image_files_abs)))
|
|
594
|
-
|
|
595
|
-
|
|
607
|
+
|
|
608
|
+
|
|
596
609
|
assert len(image_results) == len(image_files_abs)
|
|
597
|
-
|
|
598
|
-
|
|
610
|
+
|
|
611
|
+
|
|
599
612
|
## Re-assembly of results into a COCO dict
|
|
600
|
-
|
|
613
|
+
|
|
601
614
|
print('Assembling labels...')
|
|
602
|
-
|
|
615
|
+
|
|
603
616
|
images = []
|
|
604
617
|
annotations = []
|
|
605
|
-
|
|
618
|
+
|
|
606
619
|
for image_result in tqdm(image_results):
|
|
607
|
-
|
|
620
|
+
|
|
608
621
|
im = image_result[0]
|
|
609
622
|
annotations_this_image = image_result[1]
|
|
610
|
-
|
|
623
|
+
|
|
611
624
|
# If we have annotations for this image
|
|
612
625
|
if len(annotations_this_image) > 0:
|
|
613
626
|
assert im['error'] is None
|
|
614
627
|
images.append(im)
|
|
615
628
|
for ann in annotations_this_image:
|
|
616
629
|
annotations.append(ann)
|
|
617
|
-
|
|
630
|
+
|
|
618
631
|
# If this image failed to read
|
|
619
632
|
elif im['error'] is not None:
|
|
620
|
-
|
|
633
|
+
|
|
621
634
|
if error_image_handling == 'skip':
|
|
622
635
|
pass
|
|
623
636
|
elif error_image_handling == 'no_annotations':
|
|
624
|
-
images.append(im)
|
|
625
|
-
|
|
637
|
+
images.append(im)
|
|
638
|
+
|
|
626
639
|
# If this image read successfully, but there are no annotations
|
|
627
640
|
else:
|
|
628
|
-
|
|
641
|
+
|
|
629
642
|
if empty_image_handling == 'skip':
|
|
630
643
|
pass
|
|
631
644
|
elif empty_image_handling == 'no_annotations':
|
|
@@ -641,13 +654,18 @@ def yolo_to_coco(input_folder,
|
|
|
641
654
|
# we're adopting.
|
|
642
655
|
# ann['bbox'] = [0,0,0,0]
|
|
643
656
|
annotations.append(ann)
|
|
644
|
-
images.append(im)
|
|
645
|
-
|
|
657
|
+
images.append(im)
|
|
658
|
+
|
|
646
659
|
# ...for each image result
|
|
647
|
-
|
|
660
|
+
|
|
661
|
+
# Clean up unnecessary error fields
|
|
662
|
+
for im in images:
|
|
663
|
+
if 'error' in im and im['error'] is None:
|
|
664
|
+
del im['error']
|
|
665
|
+
|
|
648
666
|
print('Read {} annotations for {} images'.format(len(annotations),
|
|
649
667
|
len(images)))
|
|
650
|
-
|
|
668
|
+
|
|
651
669
|
d = {}
|
|
652
670
|
d['images'] = images
|
|
653
671
|
d['annotations'] = annotations
|
|
@@ -667,25 +685,25 @@ def yolo_to_coco(input_folder,
|
|
|
667
685
|
#%% Interactive driver
|
|
668
686
|
|
|
669
687
|
if False:
|
|
670
|
-
|
|
688
|
+
|
|
671
689
|
pass
|
|
672
690
|
|
|
673
691
|
#%% Convert YOLO folders to COCO
|
|
674
|
-
|
|
692
|
+
|
|
675
693
|
preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
|
|
676
694
|
input_folder = '/home/user/data/noaa-fish/val'
|
|
677
695
|
output_file = '/home/user/data/noaa-fish/val.json'
|
|
678
696
|
class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
|
|
679
697
|
|
|
680
698
|
d = yolo_to_coco(input_folder,class_name_file,output_file)
|
|
681
|
-
|
|
699
|
+
|
|
682
700
|
input_folder = '/home/user/data/noaa-fish/train'
|
|
683
701
|
output_file = '/home/user/data/noaa-fish/train.json'
|
|
684
702
|
class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
|
|
685
703
|
|
|
686
704
|
d = yolo_to_coco(input_folder,class_name_file,output_file)
|
|
687
|
-
|
|
688
|
-
|
|
705
|
+
|
|
706
|
+
|
|
689
707
|
#%% Check DB integrity
|
|
690
708
|
|
|
691
709
|
from megadetector.data_management.databases import integrity_check_json_db
|
|
@@ -715,11 +733,135 @@ if False:
|
|
|
715
733
|
output_dir=preview_folder,
|
|
716
734
|
image_base_dir=input_folder,
|
|
717
735
|
options=viz_options)
|
|
718
|
-
|
|
736
|
+
|
|
719
737
|
from megadetector.utils.path_utils import open_file
|
|
720
738
|
open_file(html_output_file)
|
|
721
739
|
|
|
722
740
|
|
|
723
741
|
#%% Command-line driver
|
|
724
742
|
|
|
725
|
-
|
|
743
|
+
def main():
|
|
744
|
+
"""
|
|
745
|
+
Command-line driver for YOLO to COCO conversion.
|
|
746
|
+
"""
|
|
747
|
+
|
|
748
|
+
parser = argparse.ArgumentParser(
|
|
749
|
+
description='Convert a YOLO-formatted dataset to COCO format'
|
|
750
|
+
)
|
|
751
|
+
parser.add_argument(
|
|
752
|
+
'input_folder',
|
|
753
|
+
type=str,
|
|
754
|
+
help='Path to the YOLO dataset folder (image folder)'
|
|
755
|
+
)
|
|
756
|
+
parser.add_argument(
|
|
757
|
+
'class_name_file',
|
|
758
|
+
type=str,
|
|
759
|
+
help='Path to the file containing class names (e.g., classes.txt or dataset.yaml)'
|
|
760
|
+
)
|
|
761
|
+
parser.add_argument(
|
|
762
|
+
'output_file',
|
|
763
|
+
type=str,
|
|
764
|
+
help='Path to the output COCO .json file.'
|
|
765
|
+
)
|
|
766
|
+
parser.add_argument(
|
|
767
|
+
'--label_folder',
|
|
768
|
+
type=str,
|
|
769
|
+
default=None,
|
|
770
|
+
help='Label folder, if different from the image folder. Default: None (labels are in the image folder)'
|
|
771
|
+
)
|
|
772
|
+
parser.add_argument(
|
|
773
|
+
'--empty_image_handling',
|
|
774
|
+
type=str,
|
|
775
|
+
default='no_annotations',
|
|
776
|
+
choices=['no_annotations', 'empty_annotations', 'skip', 'error'],
|
|
777
|
+
help='How to handle images with no bounding boxes.'
|
|
778
|
+
)
|
|
779
|
+
parser.add_argument(
|
|
780
|
+
'--empty_image_category_name',
|
|
781
|
+
type=str,
|
|
782
|
+
default='empty',
|
|
783
|
+
help='Category name for empty images if empty_image_handling is "empty_annotations"'
|
|
784
|
+
)
|
|
785
|
+
parser.add_argument(
|
|
786
|
+
'--error_image_handling',
|
|
787
|
+
type=str,
|
|
788
|
+
default='no_annotations',
|
|
789
|
+
choices=['skip', 'no_annotations'],
|
|
790
|
+
help='How to handle images that fail to load'
|
|
791
|
+
)
|
|
792
|
+
parser.add_argument(
|
|
793
|
+
'--allow_images_without_label_files',
|
|
794
|
+
type=str,
|
|
795
|
+
default='true',
|
|
796
|
+
choices=['true', 'false'],
|
|
797
|
+
help='Whether to allow images that do not have corresponding label files (true/false)'
|
|
798
|
+
)
|
|
799
|
+
parser.add_argument(
|
|
800
|
+
'--n_workers',
|
|
801
|
+
type=int,
|
|
802
|
+
default=1,
|
|
803
|
+
help='Number of workers for parallel processing. <=1 for sequential'
|
|
804
|
+
)
|
|
805
|
+
parser.add_argument(
|
|
806
|
+
'--pool_type',
|
|
807
|
+
type=str,
|
|
808
|
+
default='thread',
|
|
809
|
+
choices=['thread', 'process'],
|
|
810
|
+
help='Type of multiprocessing pool if n_workers > 1'
|
|
811
|
+
)
|
|
812
|
+
parser.add_argument(
|
|
813
|
+
'--recursive',
|
|
814
|
+
type=str,
|
|
815
|
+
default='true',
|
|
816
|
+
choices=['true', 'false'],
|
|
817
|
+
help='Whether to search for images recursively in the input folder (true/false)'
|
|
818
|
+
)
|
|
819
|
+
parser.add_argument(
|
|
820
|
+
'--exclude_string',
|
|
821
|
+
type=str,
|
|
822
|
+
default=None,
|
|
823
|
+
help='Exclude images whose filename contains this string'
|
|
824
|
+
)
|
|
825
|
+
parser.add_argument(
|
|
826
|
+
'--include_string',
|
|
827
|
+
type=str,
|
|
828
|
+
default=None,
|
|
829
|
+
help='Include images only if filename contains this string'
|
|
830
|
+
)
|
|
831
|
+
parser.add_argument(
|
|
832
|
+
'--overwrite_handling',
|
|
833
|
+
type=str,
|
|
834
|
+
default='overwrite',
|
|
835
|
+
choices=['load', 'overwrite', 'error'],
|
|
836
|
+
help='Behavior if output_file exists.'
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
if len(sys.argv[1:]) == 0:
|
|
840
|
+
parser.print_help()
|
|
841
|
+
parser.exit()
|
|
842
|
+
|
|
843
|
+
args = parser.parse_args()
|
|
844
|
+
|
|
845
|
+
parsed_allow_images = args.allow_images_without_label_files.lower() == 'true'
|
|
846
|
+
parsed_recursive = args.recursive.lower() == 'true'
|
|
847
|
+
|
|
848
|
+
yolo_to_coco(
|
|
849
|
+
args.input_folder,
|
|
850
|
+
args.class_name_file,
|
|
851
|
+
output_file=args.output_file,
|
|
852
|
+
label_folder=args.label_folder,
|
|
853
|
+
empty_image_handling=args.empty_image_handling,
|
|
854
|
+
empty_image_category_name=args.empty_image_category_name,
|
|
855
|
+
error_image_handling=args.error_image_handling,
|
|
856
|
+
allow_images_without_label_files=parsed_allow_images,
|
|
857
|
+
n_workers=args.n_workers,
|
|
858
|
+
pool_type=args.pool_type,
|
|
859
|
+
recursive=parsed_recursive,
|
|
860
|
+
exclude_string=args.exclude_string,
|
|
861
|
+
include_string=args.include_string,
|
|
862
|
+
overwrite_handling=args.overwrite_handling
|
|
863
|
+
)
|
|
864
|
+
print(f"Dataset conversion complete, output written to {args.output_file}")
|
|
865
|
+
|
|
866
|
+
if __name__ == '__main__':
|
|
867
|
+
main()
|