megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
coco_to_labelme.py
|
|
4
|
+
|
|
5
|
+
Converts a COCO dataset to labelme format (one .json per image file).
|
|
6
|
+
|
|
7
|
+
If you want to convert YOLO-formatted data to labelme format, use yolo_to_coco, then
|
|
8
|
+
coco_to_labelme.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
#%% Imports and constants
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
from tqdm import tqdm
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
|
|
20
|
+
from md_visualization.visualization_utils import open_image
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
#%% Functions
|
|
24
|
+
|
|
25
|
+
def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
|
|
26
|
+
"""
|
|
27
|
+
For the given image struct in COCO format and associated list of annotations, reformats the
|
|
28
|
+
detections into labelme format.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
im (dict): image dict, as loaded from a COCO .json file; 'height' and 'width' are required
|
|
32
|
+
annotations (list): a list of annotations that refer to this image (this function errors if
|
|
33
|
+
that's not the case)
|
|
34
|
+
categories (list): a list of category in dicts in COCO format ({'id':x,'name':'s'})
|
|
35
|
+
info (dict, optional): a dict to store in a non-standard "custom_info" field in the output
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
dict: a dict in labelme format, suitable for writing to a labelme .json file
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
image_base_name = os.path.basename(im['file_name'])
|
|
42
|
+
|
|
43
|
+
output_dict = {}
|
|
44
|
+
if info is not None:
|
|
45
|
+
output_dict['custom_info'] = info
|
|
46
|
+
output_dict['version'] = '5.3.0a0'
|
|
47
|
+
output_dict['flags'] = {}
|
|
48
|
+
output_dict['shapes'] = []
|
|
49
|
+
output_dict['imagePath'] = image_base_name
|
|
50
|
+
output_dict['imageHeight'] = im['height']
|
|
51
|
+
output_dict['imageWidth'] = im['width']
|
|
52
|
+
output_dict['imageData'] = None
|
|
53
|
+
|
|
54
|
+
# Store COCO categories in case we want to reconstruct the original IDs later
|
|
55
|
+
output_dict['coco_categories'] = categories
|
|
56
|
+
|
|
57
|
+
category_id_to_name = {c['id']:c['name'] for c in categories}
|
|
58
|
+
|
|
59
|
+
if 'flags' in im:
|
|
60
|
+
output_dict['flags'] = im['flags']
|
|
61
|
+
|
|
62
|
+
# ann = annotations[0]
|
|
63
|
+
for ann in annotations:
|
|
64
|
+
|
|
65
|
+
assert ann['image_id'] == im['id'], 'Annotation {} does not refer to image {}'.format(
|
|
66
|
+
ann['id'],im['id'])
|
|
67
|
+
|
|
68
|
+
if 'bbox' not in ann:
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
shape = {}
|
|
72
|
+
shape['label'] = category_id_to_name[ann['category_id']]
|
|
73
|
+
shape['shape_type'] = 'rectangle'
|
|
74
|
+
shape['description'] = ''
|
|
75
|
+
shape['group_id'] = None
|
|
76
|
+
|
|
77
|
+
# COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
|
|
78
|
+
#
|
|
79
|
+
# labelme boxes are [[x0,y0],[x1,y1]] (absolute)
|
|
80
|
+
x0 = ann['bbox'][0]
|
|
81
|
+
y0 = ann['bbox'][1]
|
|
82
|
+
x1 = ann['bbox'][0] + ann['bbox'][2]
|
|
83
|
+
y1 = ann['bbox'][1] + ann['bbox'][3]
|
|
84
|
+
|
|
85
|
+
shape['points'] = [[x0,y0],[x1,y1]]
|
|
86
|
+
output_dict['shapes'].append(shape)
|
|
87
|
+
|
|
88
|
+
# ...for each detection
|
|
89
|
+
|
|
90
|
+
return output_dict
|
|
91
|
+
|
|
92
|
+
# ...def get_labelme_dict_for_image()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
|
|
96
|
+
"""
|
|
97
|
+
For all the images in [coco_data] (a dict or a filename), write a .json file in
|
|
98
|
+
labelme format alongside the corresponding relative path within image_base.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
# Load COCO data if necessary
|
|
102
|
+
if isinstance(coco_data,str):
|
|
103
|
+
with open(coco_data,'r') as f:
|
|
104
|
+
coco_data = json.load(f)
|
|
105
|
+
assert isinstance(coco_data,dict)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
## Read image sizes if necessary
|
|
109
|
+
|
|
110
|
+
if bypass_image_size_check:
|
|
111
|
+
|
|
112
|
+
print('Bypassing size check')
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
|
|
116
|
+
# TODO: parallelize this loop
|
|
117
|
+
|
|
118
|
+
print('Reading/validating image sizes...')
|
|
119
|
+
|
|
120
|
+
# im = coco_data['images'][0]
|
|
121
|
+
for im in tqdm(coco_data['images']):
|
|
122
|
+
|
|
123
|
+
# Make sure this file exists
|
|
124
|
+
im_full_path = os.path.join(image_base,im['file_name'])
|
|
125
|
+
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
|
|
126
|
+
|
|
127
|
+
# Load w/h information if necessary
|
|
128
|
+
if 'height' not in im or 'width' not in im:
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
pil_im = open_image(im_full_path)
|
|
132
|
+
im['width'] = pil_im.width
|
|
133
|
+
im['height'] = pil_im.height
|
|
134
|
+
except Exception:
|
|
135
|
+
print('Warning: cannot open image {}'.format(im_full_path))
|
|
136
|
+
if 'failure' not in im:
|
|
137
|
+
im['failure'] = 'Failure image access'
|
|
138
|
+
|
|
139
|
+
# ...if we need to read w/h information
|
|
140
|
+
|
|
141
|
+
# ...for each image
|
|
142
|
+
|
|
143
|
+
# ...if we need to load image sizes
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
## Generate labelme files
|
|
147
|
+
|
|
148
|
+
print('Generating .json files...')
|
|
149
|
+
|
|
150
|
+
image_id_to_annotations = defaultdict(list)
|
|
151
|
+
for ann in coco_data['annotations']:
|
|
152
|
+
image_id_to_annotations[ann['image_id']].append(ann)
|
|
153
|
+
|
|
154
|
+
n_json_files_written = 0
|
|
155
|
+
n_json_files_error = 0
|
|
156
|
+
n_json_files_exist = 0
|
|
157
|
+
|
|
158
|
+
# Write output
|
|
159
|
+
for im in tqdm(coco_data['images']):
|
|
160
|
+
|
|
161
|
+
# Skip this image if it failed to load in whatever system generated this COCO file
|
|
162
|
+
skip_image = False
|
|
163
|
+
|
|
164
|
+
# Errors are represented differently depending on the source
|
|
165
|
+
for error_string in ('failure','error'):
|
|
166
|
+
if (error_string in im) and (im[error_string] is not None):
|
|
167
|
+
if verbose:
|
|
168
|
+
print('Warning: skipping labelme file generation for failed image {}'.format(
|
|
169
|
+
im['file_name']))
|
|
170
|
+
skip_image = True
|
|
171
|
+
n_json_files_error += 1
|
|
172
|
+
break
|
|
173
|
+
if skip_image:
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
im_full_path = os.path.join(image_base,im['file_name'])
|
|
177
|
+
json_path = os.path.splitext(im_full_path)[0] + '.json'
|
|
178
|
+
|
|
179
|
+
if (not overwrite) and (os.path.isfile(json_path)):
|
|
180
|
+
if verbose:
|
|
181
|
+
print('Skipping existing file {}'.format(json_path))
|
|
182
|
+
n_json_files_exist += 1
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
annotations_this_image = image_id_to_annotations[im['id']]
|
|
186
|
+
output_dict = get_labelme_dict_for_image_from_coco_record(im,
|
|
187
|
+
annotations_this_image,
|
|
188
|
+
coco_data['categories'],
|
|
189
|
+
info=None)
|
|
190
|
+
|
|
191
|
+
n_json_files_written += 1
|
|
192
|
+
with open(json_path,'w') as f:
|
|
193
|
+
json.dump(output_dict,f,indent=1)
|
|
194
|
+
|
|
195
|
+
# ...for each image
|
|
196
|
+
|
|
197
|
+
print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
|
|
198
|
+
n_json_files_written,n_json_files_error,n_json_files_exist))
|
|
199
|
+
|
|
200
|
+
# ...def coco_to_labelme()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
#%% Interactive driver
|
|
204
|
+
|
|
205
|
+
if False:
|
|
206
|
+
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
#%% Configure options
|
|
210
|
+
|
|
211
|
+
coco_file = \
|
|
212
|
+
r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
|
|
213
|
+
image_folder = os.path.dirname(coco_file)
|
|
214
|
+
overwrite = True
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
#%% Programmatic execution
|
|
218
|
+
|
|
219
|
+
coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
#%% Command-line execution
|
|
223
|
+
|
|
224
|
+
s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
|
|
225
|
+
if overwrite:
|
|
226
|
+
s += ' --overwrite'
|
|
227
|
+
|
|
228
|
+
print(s)
|
|
229
|
+
import clipboard; clipboard.copy(s)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
#%% Opening labelme
|
|
233
|
+
|
|
234
|
+
s = 'python labelme {}'.format(image_folder)
|
|
235
|
+
print(s)
|
|
236
|
+
import clipboard; clipboard.copy(s)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
#%% Command-line driver
|
|
240
|
+
|
|
241
|
+
import sys,argparse
|
|
242
|
+
|
|
243
|
+
def main():
|
|
244
|
+
|
|
245
|
+
parser = argparse.ArgumentParser(
|
|
246
|
+
description='Convert a COCO database to labelme annotation format')
|
|
247
|
+
|
|
248
|
+
parser.add_argument(
|
|
249
|
+
'coco_file',
|
|
250
|
+
type=str,
|
|
251
|
+
help='Path to COCO data file (.json)')
|
|
252
|
+
|
|
253
|
+
parser.add_argument(
|
|
254
|
+
'image_base',
|
|
255
|
+
type=str,
|
|
256
|
+
help='Path to images (also the output folder)')
|
|
257
|
+
|
|
258
|
+
parser.add_argument(
|
|
259
|
+
'--overwrite',
|
|
260
|
+
action='store_true',
|
|
261
|
+
help='Overwrite existing labelme .json files')
|
|
262
|
+
|
|
263
|
+
if len(sys.argv[1:]) == 0:
|
|
264
|
+
parser.print_help()
|
|
265
|
+
parser.exit()
|
|
266
|
+
|
|
267
|
+
args = parser.parse_args()
|
|
268
|
+
|
|
269
|
+
coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
|
|
270
|
+
|
|
271
|
+
if __name__ == '__main__':
|
|
272
|
+
main()
|
data_management/coco_to_yolo.py
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
coco_to_yolo.py
|
|
4
|
+
|
|
5
|
+
Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
|
|
6
|
+
the dataset (to a single folder) in the process.
|
|
7
|
+
|
|
8
|
+
If the input and output folders are the same, writes .txt files to the input folder,
|
|
9
|
+
and neither moves nor modifies images.
|
|
10
|
+
|
|
11
|
+
Currently ignores segmentation masks, and errors if an annotation has a
|
|
12
|
+
segmentation polygon but no bbox.
|
|
13
|
+
|
|
14
|
+
Has only been tested on a handful of COCO Camera Traps data sets; if you
|
|
15
|
+
use it for more general COCO conversion, YMMV.
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
18
|
|
|
19
19
|
#%% Imports and constants
|
|
20
20
|
|
|
@@ -37,16 +37,16 @@ def write_yolo_dataset_file(yolo_dataset_file,
|
|
|
37
37
|
val_folder_relative=None,
|
|
38
38
|
test_folder_relative=None):
|
|
39
39
|
"""
|
|
40
|
-
Write a YOLOv5 dataset.yaml file to the absolute path yolo_dataset_file (should
|
|
40
|
+
Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
|
|
41
41
|
have a .yaml extension, though it's only a warning if it doesn't).
|
|
42
|
-
|
|
43
|
-
[dataset_base_dir] should be the absolute path of the dataset root.
|
|
44
42
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
Args:
|
|
44
|
+
yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
|
|
45
|
+
Does not have to be within dataset_base_dir.
|
|
46
|
+
dataset_base_dir (str): the absolute base path of the YOLO dataset
|
|
47
|
+
class_list (list or str): an ordered list of class names (the first item will be class 0,
|
|
48
|
+
etc.), or the name of a text file containing an ordered list of class names (one per
|
|
49
|
+
line, starting from class zero).
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
52
|
# Read class names
|
|
@@ -56,6 +56,10 @@ def write_yolo_dataset_file(yolo_dataset_file,
|
|
|
56
56
|
class_lines = [s.strip() for s in class_lines]
|
|
57
57
|
class_list = [s for s in class_lines if len(s) > 0]
|
|
58
58
|
|
|
59
|
+
if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
|
|
60
|
+
print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
|
|
61
|
+
yolo_dataset_file))
|
|
62
|
+
|
|
59
63
|
# Write dataset.yaml
|
|
60
64
|
with open(yolo_dataset_file,'w') as f:
|
|
61
65
|
|
|
@@ -78,7 +82,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
|
|
|
78
82
|
# ...def write_yolo_dataset_file(...)
|
|
79
83
|
|
|
80
84
|
|
|
81
|
-
def coco_to_yolo(input_image_folder,
|
|
85
|
+
def coco_to_yolo(input_image_folder,
|
|
86
|
+
output_folder,
|
|
87
|
+
input_file,
|
|
82
88
|
source_format='coco',
|
|
83
89
|
overwrite_images=False,
|
|
84
90
|
create_image_and_label_folders=False,
|
|
@@ -93,7 +99,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
|
|
|
93
99
|
write_output=True,
|
|
94
100
|
flatten_paths=True):
|
|
95
101
|
"""
|
|
96
|
-
|
|
102
|
+
Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
|
|
97
103
|
dataset to a single folder in the process.
|
|
98
104
|
|
|
99
105
|
If the input and output folders are the same, writes .txt files to the input folder,
|
|
@@ -102,32 +108,51 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
|
|
|
102
108
|
Currently ignores segmentation masks, and errors if an annotation has a
|
|
103
109
|
segmentation polygon but no bbox.
|
|
104
110
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
111
|
+
Args:
|
|
112
|
+
input_image_folder (str): the folder where images live; filenames in the COCO .json
|
|
113
|
+
file [input_file] should be relative to this folder
|
|
114
|
+
output_folder (str): the base folder for the YOLO dataset
|
|
115
|
+
input_file (str): a .json file in COCO format; can be the same as [input_image_folder], in which case
|
|
116
|
+
images are left alone.
|
|
117
|
+
source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
|
|
118
|
+
is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
|
|
119
|
+
annotation with a category id of 0 as a special case, i.e. that's how an empty image
|
|
120
|
+
is indicated. The original COCO standard is a little ambiguous on this issue. If
|
|
121
|
+
source_format is 'coco', we either treat images as empty or error, depending on the value
|
|
122
|
+
of [allow_empty_annotations]. [allow_empty_annotations] has no effect if source_format is
|
|
123
|
+
'coco_camera_traps'.
|
|
124
|
+
create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
|
|
125
|
+
'labels' in the YOLO output folder. If create_image_and_label_folders is False,
|
|
126
|
+
a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
|
|
127
|
+
be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
|
|
128
|
+
images/a#b#c#image001.jpg, and the corresponding text file will be
|
|
129
|
+
labels/a#b#c#image001.txt.
|
|
130
|
+
clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
|
|
131
|
+
converting to YOLO xywh format
|
|
132
|
+
image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
|
|
133
|
+
a mapping from image IDs to output file names
|
|
134
|
+
images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
|
|
135
|
+
should ignore
|
|
136
|
+
path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
|
|
137
|
+
path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
|
|
138
|
+
becomes a#b#c#d.jpg
|
|
139
|
+
category_names_to_exclude (str, optional): category names that should not be represented in the
|
|
140
|
+
YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
|
|
141
|
+
you would want to specify this and [category_names_to_include].
|
|
142
|
+
category_names_to_include (str, optional): allow-list of category names that should be represented in the
|
|
143
|
+
YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
|
|
144
|
+
you would want to specify this and [category_names_to_exclude].
|
|
145
|
+
write_output (bool, optional): determines whether we actually copy images and write annotations;
|
|
146
|
+
setting this to False mostly puts this function in "dry run" "mode. The class list
|
|
147
|
+
file is written regardless of the value of write_output.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
dict: information about the coco --> yolo mapping, containing at least the fields:
|
|
151
|
+
|
|
152
|
+
- class_list_filename: the filename to which we wrote the flat list of class names required
|
|
153
|
+
by the YOLO format.
|
|
154
|
+
- source_image_to_dest_image: a dict mapping source images to destination images
|
|
155
|
+
- coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
|
|
131
156
|
"""
|
|
132
157
|
|
|
133
158
|
## Validate input
|
|
@@ -189,7 +214,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
|
|
|
189
214
|
coco_id_to_name = {}
|
|
190
215
|
yolo_id_to_name = {}
|
|
191
216
|
coco_category_ids_to_exclude = set()
|
|
192
|
-
category_exclusion_warnings_printed = set()
|
|
193
217
|
|
|
194
218
|
for category in data['categories']:
|
|
195
219
|
coco_id_to_name[category['id']] = category['name']
|
|
@@ -465,9 +489,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
|
|
|
465
489
|
#
|
|
466
490
|
# https://github.com/ultralytics/yolov5/issues/3218
|
|
467
491
|
#
|
|
468
|
-
# I think this is also true for images with empty
|
|
469
|
-
# I'm using the convention suggested on that issue, i.e. hard
|
|
470
|
-
# are expressed as images without .txt files.
|
|
492
|
+
# I think this is also true for images with empty .txt files, but
|
|
493
|
+
# I'm using the convention suggested on that issue, i.e. hard
|
|
494
|
+
# negatives are expressed as images without .txt files.
|
|
471
495
|
if len(bboxes) > 0:
|
|
472
496
|
|
|
473
497
|
with open(dest_txt,'w') as f:
|
|
@@ -497,12 +521,12 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
|
|
|
497
521
|
class_list_output_name='object.data',
|
|
498
522
|
force_lowercase_image_extension=False):
|
|
499
523
|
"""
|
|
500
|
-
Given a YOLO-formatted folder of images and .txt files,
|
|
524
|
+
Given a YOLO-formatted folder of images and .txt files, creates a folder
|
|
501
525
|
of symlinks to all the images, and a folder of symlinks to all the labels.
|
|
502
|
-
Used to support preview/editing tools
|
|
503
|
-
|
|
526
|
+
Used to support preview/editing tools that assume images and labels are in separate
|
|
527
|
+
folders.
|
|
504
528
|
|
|
505
|
-
|
|
529
|
+
:meta private:
|
|
506
530
|
"""
|
|
507
531
|
|
|
508
532
|
assert source_folder != images_folder and source_folder != labels_folder
|
|
@@ -616,7 +640,7 @@ def main():
|
|
|
616
640
|
parser.add_argument(
|
|
617
641
|
'--create_bounding_box_editor_symlinks',
|
|
618
642
|
action='store_true',
|
|
619
|
-
help='Prepare symlinks so the whole folder
|
|
643
|
+
help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
|
|
620
644
|
|
|
621
645
|
if len(sys.argv[1:]) == 0:
|
|
622
646
|
parser.print_help()
|
|
File without changes
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
add_width_and_height_to_db.py
|
|
4
|
+
|
|
5
|
+
Grabs width and height from actual image files for a .json database that is missing w/h.
|
|
6
|
+
|
|
7
|
+
TODO: this is a one-off script waiting to be cleaned up for more general use.
|
|
8
|
+
|
|
9
|
+
"""
|
|
8
10
|
|
|
9
11
|
#%% Imports and constants
|
|
10
12
|
|
|
@@ -14,16 +16,18 @@ from PIL import Image
|
|
|
14
16
|
datafile = '/datadrive/snapshotserengeti/databases/snapshotserengeti.json'
|
|
15
17
|
image_base = '/datadrive/snapshotserengeti/images/'
|
|
16
18
|
|
|
19
|
+
def main():
|
|
17
20
|
|
|
18
|
-
|
|
21
|
+
with open(datafile,'r') as f:
|
|
22
|
+
data = json.load(f)
|
|
19
23
|
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
for im in data['images']:
|
|
25
|
+
if 'height' not in im:
|
|
26
|
+
im_w, im_h = Image.open(image_base+im['file_name']).size
|
|
27
|
+
im['height'] = im_h
|
|
28
|
+
im['width'] = im_w
|
|
22
29
|
|
|
23
|
-
|
|
24
|
-
if 'height' not in im:
|
|
25
|
-
im_w, im_h = Image.open(image_base+im['file_name']).size
|
|
26
|
-
im['height'] = im_h
|
|
27
|
-
im['width'] = im_w
|
|
30
|
+
json.dump(data, open(datafile,'w'))
|
|
28
31
|
|
|
29
|
-
|
|
32
|
+
if __name__ == '__main__':
|
|
33
|
+
main()
|
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
combine_coco_camera_traps_files.py
|
|
4
|
+
|
|
5
|
+
Merges two or more .json files in COCO Camera Traps format, optionally
|
|
6
|
+
writing the results to another .json file.
|
|
7
|
+
|
|
8
|
+
- Concatenates image lists, erroring if images are not unique.
|
|
9
|
+
- Errors on unrecognized fields.
|
|
10
|
+
- Checks compatibility in info structs, within reason.
|
|
11
|
+
|
|
12
|
+
*Example command-line invocation*
|
|
13
|
+
|
|
14
|
+
combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
|
|
15
|
+
|
|
16
|
+
"""
|
|
15
17
|
|
|
16
18
|
#%% Constants and imports
|
|
17
19
|
|
|
@@ -19,26 +21,25 @@ import argparse
|
|
|
19
21
|
import json
|
|
20
22
|
import sys
|
|
21
23
|
|
|
22
|
-
from typing import Any, Dict, Iterable, Mapping, List, Optional
|
|
23
|
-
|
|
24
24
|
|
|
25
25
|
#%% Merge functions
|
|
26
26
|
|
|
27
|
-
def combine_cct_files(input_files
|
|
28
|
-
|
|
29
|
-
require_uniqueness: Optional[bool] = True,
|
|
30
|
-
filename_prefixes: Optional[dict] = None
|
|
31
|
-
) -> Dict[str, Any]:
|
|
27
|
+
def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
|
|
28
|
+
filename_prefixes=None):
|
|
32
29
|
"""
|
|
33
|
-
Merges list of COCO Camera Traps files
|
|
34
|
-
dictionary, optionally writing the result to
|
|
30
|
+
Merges the list of COCO Camera Traps files [input_files] into a single
|
|
31
|
+
dictionary, optionally writing the result to [output_file].
|
|
35
32
|
|
|
36
33
|
Args:
|
|
37
|
-
input_files
|
|
38
|
-
output_file
|
|
39
|
-
require_uniqueness
|
|
34
|
+
input_files (list): paths to CCT .json files
|
|
35
|
+
output_file (str, optional): path to write merged .json file
|
|
36
|
+
require_uniqueness (bool): whether to require that the images in
|
|
40
37
|
each input_dict be unique
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
dict: the merged COCO-formatted .json dict
|
|
41
41
|
"""
|
|
42
|
+
|
|
42
43
|
input_dicts = []
|
|
43
44
|
print('Loading input files')
|
|
44
45
|
for fn in input_files:
|
|
@@ -61,11 +62,9 @@ def combine_cct_files(input_files: List[str],
|
|
|
61
62
|
return merged_dict
|
|
62
63
|
|
|
63
64
|
|
|
64
|
-
def combine_cct_dictionaries(input_dicts
|
|
65
|
-
require_uniqueness: Optional[bool] = True
|
|
66
|
-
) -> Dict[str, Any]:
|
|
65
|
+
def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
|
|
67
66
|
"""
|
|
68
|
-
Merges the list of COCO Camera Traps dictionaries
|
|
67
|
+
Merges the list of COCO Camera Traps dictionaries [input_dicts]. See module header
|
|
69
68
|
comment for details on merge rules.
|
|
70
69
|
|
|
71
70
|
Args:
|
|
@@ -73,7 +72,8 @@ def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
|
|
|
73
72
|
require_uniqueness: bool, whether to require that the images in
|
|
74
73
|
each input_dict be unique
|
|
75
74
|
|
|
76
|
-
Returns:
|
|
75
|
+
Returns:
|
|
76
|
+
dict: the merged COCO-formatted .json dict
|
|
77
77
|
"""
|
|
78
78
|
|
|
79
79
|
filename_to_image = {}
|
|
@@ -177,12 +177,16 @@ def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
|
|
|
177
177
|
'categories': all_categories,
|
|
178
178
|
'images': sorted_images,
|
|
179
179
|
'annotations': all_annotations}
|
|
180
|
+
|
|
180
181
|
return merged_dict
|
|
181
182
|
|
|
183
|
+
# ...combine_cct_dictionaries(...)
|
|
184
|
+
|
|
182
185
|
|
|
183
186
|
#%% Command-line driver
|
|
184
187
|
|
|
185
188
|
def main():
|
|
189
|
+
|
|
186
190
|
parser = argparse.ArgumentParser()
|
|
187
191
|
parser.add_argument(
|
|
188
192
|
'input_paths', nargs='+',
|