megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,702 @@
1
+ """
2
+
3
+ coco_to_yolo.py
4
+
5
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
6
+ the dataset (to a single folder) in the process.
7
+
8
+ If the input and output folders are the same, writes .txt files to the input folder,
9
+ and neither moves nor modifies images.
10
+
11
+ Currently ignores segmentation masks, and errors if an annotation has a
12
+ segmentation polygon but no bbox.
13
+
14
+ Has only been tested on a handful of COCO Camera Traps data sets; if you
15
+ use it for more general COCO conversion, YMMV.
16
+
17
+ """
18
+
19
+ #%% Imports and constants
20
+
21
+ import json
22
+ import os
23
+ import shutil
24
+ import sys
25
+ import argparse
26
+
27
+ from collections import defaultdict
28
+ from tqdm import tqdm
29
+
30
+ from megadetector.utils.path_utils import safe_create_link,find_images
31
+
32
+
33
+ #%% Support functions
34
+
35
+ def write_yolo_dataset_file(yolo_dataset_file,
36
+ dataset_base_dir,
37
+ class_list,
38
+ train_folder_relative=None,
39
+ val_folder_relative=None,
40
+ test_folder_relative=None):
41
+ """
42
+ Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
43
+ have a .yaml extension, though it's only a warning if it doesn't).
44
+
45
+ Args:
46
+ yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
47
+ Does not have to be within dataset_base_dir.
48
+ dataset_base_dir (str): the absolute base path of the YOLO dataset
49
+ class_list (list or str): an ordered list of class names (the first item will be class 0,
50
+ etc.), or the name of a text file containing an ordered list of class names (one per
51
+ line, starting from class zero).
52
+ train_folder_relative (str, optional): train folder name, used only to
53
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
54
+ files).
55
+ val_folder_relative (str, optional): val folder name, used only to
56
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
57
+ files).
58
+ test_folder_relative (str, optional): test folder name, used only to
59
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
60
+ files).
61
+ """
62
+
63
+ # Read class names
64
+ if isinstance(class_list,str):
65
+ with open(class_list,'r') as f:
66
+ class_lines = f.readlines()
67
+ class_lines = [s.strip() for s in class_lines]
68
+ class_list = [s for s in class_lines if len(s) > 0]
69
+
70
+ if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
71
+ print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
72
+ yolo_dataset_file))
73
+
74
+ # Write dataset.yaml
75
+ with open(yolo_dataset_file,'w') as f:
76
+
77
+ f.write('# Train/val sets\n')
78
+ f.write('path: {}\n'.format(dataset_base_dir))
79
+ if train_folder_relative is not None:
80
+ f.write('train: {}\n'.format(train_folder_relative))
81
+ if val_folder_relative is not None:
82
+ f.write('val: {}\n'.format(val_folder_relative))
83
+ if test_folder_relative is not None:
84
+ f.write('test: {}\n'.format(test_folder_relative))
85
+
86
+ f.write('\n')
87
+
88
+ f.write('# Classes\n')
89
+ f.write('names:\n')
90
+ for i_class,class_name in enumerate(class_list):
91
+ f.write(' {}: {}\n'.format(i_class,class_name))
92
+
93
+ # ...def write_yolo_dataset_file(...)
94
+
95
+
96
+ def coco_to_yolo(input_image_folder,
97
+ output_folder,
98
+ input_file,
99
+ source_format='coco',
100
+ overwrite_images=False,
101
+ create_image_and_label_folders=False,
102
+ class_file_name='classes.txt',
103
+ allow_empty_annotations=False,
104
+ clip_boxes=False,
105
+ image_id_to_output_image_json_file=None,
106
+ images_to_exclude=None,
107
+ path_replacement_char='#',
108
+ category_names_to_exclude=None,
109
+ category_names_to_include=None,
110
+ write_output=True,
111
+ flatten_paths=False):
112
+ """
113
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
114
+ dataset to a single folder in the process.
115
+
116
+ If the input and output folders are the same, writes .txt files to the input folder,
117
+ and neither moves nor modifies images.
118
+
119
+ Currently ignores segmentation masks, and errors if an annotation has a
120
+ segmentation polygon but no bbox.
121
+
122
+ Args:
123
+ input_image_folder (str): the folder where images live; filenames in the COCO .json
124
+ file [input_file] should be relative to this folder
125
+ output_folder (str): the base folder for the YOLO dataset
126
+ input_file (str): a .json file in COCO format; can be the same as [input_image_folder], in which case
127
+ images are left alone.
128
+ source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
129
+ is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
130
+ annotation as a special case, i.e. that's how an empty image is indicated. The original
131
+ COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
132
+ either treat images as empty or error, depending on the value of [allow_empty_annotations].
133
+ [allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
134
+ overwrite_images (bool, optional): over-write images in the output folder if they exist
135
+ create_image_and_label_folders (bool, optional): whether to create separate folders called 'images' and
136
+ 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
137
+ a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
138
+ be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
139
+ images/a#b#c#image001.jpg, and the corresponding text file will be
140
+ labels/a#b#c#image001.txt.
141
+ class_file_name (str, optional): .txt file (relative to the output folder) that we should
142
+ populate with a list of classes (or None to omit)
143
+ allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
144
+ we'll error on annotations that have no 'bbox' field
145
+ clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
146
+ converting to YOLO xywh format
147
+ image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
148
+ a mapping from image IDs to output file names
149
+ images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
150
+ should ignore
151
+ path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
152
+ path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
153
+ becomes a#b#c#d.jpg
154
+ category_names_to_exclude (str, optional): category names that should not be represented in the
155
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
156
+ you would want to specify this and [category_names_to_include].
157
+ category_names_to_include (str, optional): allow-list of category names that should be represented
158
+ in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
159
+ no reason you would want to specify this and [category_names_to_exclude].
160
+ write_output (bool, optional): determines whether we actually copy images and write annotations;
161
+ setting this to False mostly puts this function in "dry run" "mode. The class list
162
+ file is written regardless of the value of write_output.
163
+ flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
164
+ which ensures that the output folder is a single flat folder.
165
+
166
+ Returns:
167
+ dict: information about the coco --> yolo mapping, containing at least the fields:
168
+
169
+ - class_list_filename: the filename to which we wrote the flat list of class names required
170
+ by the YOLO format.
171
+ - source_image_to_dest_image: a dict mapping source images to destination images
172
+ - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
173
+ """
174
+
175
+ ## Validate input
176
+
177
+ if category_names_to_include is not None and category_names_to_exclude is not None:
178
+ raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
179
+
180
+ if output_folder is None:
181
+ output_folder = input_image_folder
182
+
183
+ if images_to_exclude is not None:
184
+ images_to_exclude = set(images_to_exclude)
185
+
186
+ if category_names_to_exclude is None:
187
+ category_names_to_exclude = {}
188
+
189
+ assert os.path.isdir(input_image_folder)
190
+ assert os.path.isfile(input_file)
191
+ os.makedirs(output_folder,exist_ok=True)
192
+
193
+ if (output_folder == input_image_folder) and (overwrite_images) and \
194
+ (not create_image_and_label_folders) and (not flatten_paths):
195
+ print('Warning: output folder and input folder are the same, disabling overwrite_images')
196
+ overwrite_images = False
197
+
198
+ ## Read input data
199
+
200
+ with open(input_file,'r') as f:
201
+ data = json.load(f)
202
+
203
+
204
+ ## Parse annotations
205
+
206
+ image_id_to_annotations = defaultdict(list)
207
+
208
+ # i_ann = 0; ann = data['annotations'][0]
209
+ for i_ann,ann in enumerate(data['annotations']):
210
+
211
+ # Make sure no annotations have *only* segmentation data
212
+ if ( \
213
+ ('segmentation' in ann.keys()) and \
214
+ (ann['segmentation'] is not None) and \
215
+ (len(ann['segmentation']) > 0) ) \
216
+ and \
217
+ (('bbox' not in ann.keys()) or (ann['bbox'] is None) or (len(ann['bbox'])==0)):
218
+ raise ValueError('Oops: segmentation data present without bbox information, ' + \
219
+ 'this script isn\'t ready for this dataset')
220
+
221
+ image_id_to_annotations[ann['image_id']].append(ann)
222
+
223
+ print('Parsed annotations for {} images'.format(len(image_id_to_annotations)))
224
+
225
+ # Re-map class IDs to make sure they run from 0...n-classes-1
226
+ #
227
+ # Note: this allows unused categories in the output data set. This is OK for
228
+ # some training pipelines, not for others.
229
+ next_category_id = 0
230
+ coco_id_to_yolo_id = {}
231
+ coco_id_to_name = {}
232
+ yolo_id_to_name = {}
233
+ coco_category_ids_to_exclude = set()
234
+
235
+ for category in data['categories']:
236
+ coco_id_to_name[category['id']] = category['name']
237
+ if (category_names_to_include is not None) and \
238
+ (category['name'] not in category_names_to_include):
239
+ coco_category_ids_to_exclude.add(category['id'])
240
+ continue
241
+ elif (category['name'] in category_names_to_exclude):
242
+ coco_category_ids_to_exclude.add(category['id'])
243
+ continue
244
+ assert category['id'] not in coco_id_to_yolo_id
245
+ coco_id_to_yolo_id[category['id']] = next_category_id
246
+ yolo_id_to_name[next_category_id] = category['name']
247
+ next_category_id += 1
248
+
249
+
250
+ ## Process images (everything but I/O)
251
+
252
+ # List of dictionaries with keys 'source_image','dest_image','bboxes','dest_txt'
253
+ images_to_copy = []
254
+
255
+ missing_images = []
256
+ excluded_images = []
257
+
258
+ image_names = set()
259
+
260
+ typical_image_extensions = set(['.jpg','.jpeg','.png','.gif','.tif','.bmp'])
261
+
262
+ printed_empty_annotation_warning = False
263
+
264
+ image_id_to_output_image_name = {}
265
+
266
+ print('Processing annotations')
267
+
268
+ n_clipped_boxes = 0
269
+ n_total_boxes = 0
270
+
271
+ # i_image = 0; im = data['images'][i_image]
272
+ for i_image,im in tqdm(enumerate(data['images']),total=len(data['images'])):
273
+
274
+ output_info = {}
275
+ source_image = os.path.join(input_image_folder,im['file_name'])
276
+ output_info['source_image'] = source_image
277
+
278
+ if images_to_exclude is not None and im['file_name'] in images_to_exclude:
279
+ excluded_images.append(im['file_name'])
280
+ continue
281
+
282
+ tokens = os.path.splitext(im['file_name'])
283
+ if tokens[1].lower() not in typical_image_extensions:
284
+ print('Warning: unusual image file name {}'.format(im['file_name']))
285
+
286
+ if flatten_paths:
287
+ image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
288
+ '_' + str(i_image).zfill(6)
289
+ else:
290
+ image_name = tokens[0]
291
+
292
+ assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
293
+ image_names.add(image_name)
294
+
295
+ assert im['id'] not in image_id_to_output_image_name
296
+ image_id_to_output_image_name[im['id']] = image_name
297
+
298
+ dest_image_relative = image_name + tokens[1]
299
+ output_info['dest_image_relative'] = dest_image_relative
300
+ dest_txt_relative = image_name + '.txt'
301
+ output_info['dest_txt_relative'] = dest_txt_relative
302
+ output_info['bboxes'] = []
303
+
304
+ # assert os.path.isfile(source_image), 'Could not find image {}'.format(source_image)
305
+ if not os.path.isfile(source_image):
306
+ print('Warning: could not find image {}'.format(source_image))
307
+ missing_images.append(im['file_name'])
308
+ continue
309
+
310
+ image_id = im['id']
311
+
312
+ image_bboxes = []
313
+
314
+ if image_id in image_id_to_annotations:
315
+
316
+ for ann in image_id_to_annotations[image_id]:
317
+
318
+ # If this annotation has no bounding boxes...
319
+ if 'bbox' not in ann or ann['bbox'] is None or len(ann['bbox']) == 0:
320
+
321
+ if source_format == 'coco':
322
+
323
+ if not allow_empty_annotations:
324
+ # This is not entirely clear from the COCO spec, but it seems to be consensus
325
+ # that if you want to specify an image with no objects, you don't include any
326
+ # annotations for that image.
327
+ raise ValueError('If an annotation exists, it should have content')
328
+ else:
329
+ continue
330
+
331
+ elif source_format == 'coco_camera_traps':
332
+
333
+ # We allow empty bbox lists in COCO camera traps files; this is typically a
334
+ # negative example in a dataset that has bounding boxes, and 0 is typically
335
+ # the empty category, which is typically 0.
336
+ if ann['category_id'] != 0:
337
+ if not printed_empty_annotation_warning:
338
+ printed_empty_annotation_warning = True
339
+ print('Warning: non-bbox annotation found with category {}'.format(
340
+ ann['category_id']))
341
+ continue
342
+
343
+ else:
344
+
345
+ raise ValueError('Unrecognized COCO variant: {}'.format(source_format))
346
+
347
+ # ...if this is an empty annotation
348
+
349
+ coco_bbox = ann['bbox']
350
+
351
+ # This category isn't in our category list. This typically corresponds to whole sets
352
+ # of images that were excluded from the YOLO set.
353
+ if ann['category_id'] in coco_category_ids_to_exclude:
354
+ continue
355
+
356
+ yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
357
+
358
+ # COCO: [x_min, y_min, width, height] in absolute coordinates
359
+ # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
360
+
361
+ # Convert from COCO coordinates to YOLO coordinates
362
+ img_w = im['width']
363
+ img_h = im['height']
364
+
365
+ if source_format in ('coco','coco_camera_traps'):
366
+
367
+ x_min_absolute = coco_bbox[0]
368
+ y_min_absolute = coco_bbox[1]
369
+ box_w_absolute = coco_bbox[2]
370
+ box_h_absolute = coco_bbox[3]
371
+
372
+ x_center_absolute = (x_min_absolute + (x_min_absolute + box_w_absolute)) / 2
373
+ y_center_absolute = (y_min_absolute + (y_min_absolute + box_h_absolute)) / 2
374
+
375
+ x_center_relative = x_center_absolute / img_w
376
+ y_center_relative = y_center_absolute / img_h
377
+
378
+ box_w_relative = box_w_absolute / img_w
379
+ box_h_relative = box_h_absolute / img_h
380
+
381
+ else:
382
+
383
+ raise ValueError('Unrecognized source format {}'.format(source_format))
384
+
385
+ if clip_boxes:
386
+
387
+ clipped_box = False
388
+
389
+ box_right = x_center_relative + (box_w_relative / 2.0)
390
+ if box_right > 1.0:
391
+ clipped_box = True
392
+ overhang = box_right - 1.0
393
+ box_w_relative -= overhang
394
+ x_center_relative -= (overhang / 2.0)
395
+
396
+ box_bottom = y_center_relative + (box_h_relative / 2.0)
397
+ if box_bottom > 1.0:
398
+ clipped_box = True
399
+ overhang = box_bottom - 1.0
400
+ box_h_relative -= overhang
401
+ y_center_relative -= (overhang / 2.0)
402
+
403
+ box_left = x_center_relative - (box_w_relative / 2.0)
404
+ if box_left < 0.0:
405
+ clipped_box = True
406
+ overhang = abs(box_left)
407
+ box_w_relative -= overhang
408
+ x_center_relative += (overhang / 2.0)
409
+
410
+ box_top = y_center_relative - (box_h_relative / 2.0)
411
+ if box_top < 0.0:
412
+ clipped_box = True
413
+ overhang = abs(box_top)
414
+ box_h_relative -= overhang
415
+ y_center_relative += (overhang / 2.0)
416
+
417
+ if clipped_box:
418
+ n_clipped_boxes += 1
419
+
420
+ yolo_box = [yolo_category_id,
421
+ x_center_relative, y_center_relative,
422
+ box_w_relative, box_h_relative]
423
+
424
+ image_bboxes.append(yolo_box)
425
+ n_total_boxes += 1
426
+
427
+ # ...for each annotation
428
+
429
+ # ...if this image has annotations
430
+
431
+ output_info['bboxes'] = image_bboxes
432
+
433
+ images_to_copy.append(output_info)
434
+
435
+ # ...for each image
436
+
437
+ print('\nWriting {} boxes ({} clipped) for {} images'.format(n_total_boxes,
438
+ n_clipped_boxes,len(images_to_copy)))
439
+ print('{} missing images (of {})'.format(len(missing_images),len(data['images'])))
440
+
441
+ if images_to_exclude is not None:
442
+ print('{} excluded images (of {})'.format(len(excluded_images),len(data['images'])))
443
+
444
+
445
+ ## Write output
446
+
447
+ print('Generating class list')
448
+
449
+ if class_file_name is not None:
450
+ class_list_filename = os.path.join(output_folder,class_file_name)
451
+ with open(class_list_filename, 'w') as f:
452
+ print('Writing class list to {}'.format(class_list_filename))
453
+ for i_class in range(0,len(yolo_id_to_name)):
454
+ # Category IDs should range from 0..N-1
455
+ assert i_class in yolo_id_to_name
456
+ f.write(yolo_id_to_name[i_class] + '\n')
457
+ else:
458
+ class_list_filename = None
459
+
460
+ if image_id_to_output_image_json_file is not None:
461
+ print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
462
+ with open(image_id_to_output_image_json_file,'w') as f:
463
+ json.dump(image_id_to_output_image_name,f,indent=1)
464
+
465
+ if (output_folder == input_image_folder) and (not create_image_and_label_folders):
466
+ print('Creating annotation files (not copying images, input and output folder are the same)')
467
+ else:
468
+ print('Copying images and creating annotation files')
469
+
470
+ if create_image_and_label_folders:
471
+ dest_image_folder = os.path.join(output_folder,'images')
472
+ dest_txt_folder = os.path.join(output_folder,'labels')
473
+ else:
474
+ dest_image_folder = output_folder
475
+ dest_txt_folder = output_folder
476
+
477
+ source_image_to_dest_image = {}
478
+
479
+ label_files_written = []
480
+ n_boxes_written = 0
481
+
482
+ # TODO: parallelize this loop
483
+ #
484
+ # output_info = images_to_copy[0]
485
+ for output_info in tqdm(images_to_copy):
486
+
487
+ source_image = output_info['source_image']
488
+ dest_image_relative = output_info['dest_image_relative']
489
+ dest_txt_relative = output_info['dest_txt_relative']
490
+
491
+ dest_image = os.path.join(dest_image_folder,dest_image_relative)
492
+ dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
493
+
494
+ source_image_to_dest_image[source_image] = dest_image
495
+
496
+ # Copy the image if necessary
497
+ if write_output:
498
+
499
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
500
+ os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
501
+
502
+ if not create_image_and_label_folders:
503
+ assert os.path.dirname(dest_image) == os.path.dirname(dest_txt)
504
+
505
+ if (not os.path.isfile(dest_image)) or (overwrite_images):
506
+ shutil.copyfile(source_image,dest_image)
507
+
508
+ bboxes = output_info['bboxes']
509
+
510
+ # Write the annotation file if necessary
511
+ #
512
+ # Only write an annotation file if there are bounding boxes. Images with
513
+ # no .txt files are treated as hard negatives, at least by YOLOv5:
514
+ #
515
+ # https://github.com/ultralytics/yolov5/issues/3218
516
+ #
517
+ # I think this is also true for images with empty .txt files, but
518
+ # I'm using the convention suggested on that issue, i.e. hard
519
+ # negatives are expressed as images without .txt files.
520
+ if len(bboxes) > 0:
521
+
522
+ n_boxes_written += len(bboxes)
523
+ label_files_written.append(dest_txt)
524
+
525
+ if write_output:
526
+
527
+ with open(dest_txt,'w') as f:
528
+
529
+ # bbox = bboxes[0]
530
+ for bbox in bboxes:
531
+ assert len(bbox) == 5
532
+ s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
533
+ f.write(s + '\n')
534
+
535
+
536
+ # ...for each image
537
+
538
+ coco_to_yolo_info = {}
539
+ coco_to_yolo_info['class_list_filename'] = class_list_filename
540
+ coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
541
+ coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
542
+ coco_to_yolo_info['label_files_written'] = label_files_written
543
+ coco_to_yolo_info['n_boxes_written'] = n_boxes_written
544
+
545
+ return coco_to_yolo_info
546
+
547
+ # ...def coco_to_yolo(...)
548
+
549
+
550
+ def create_yolo_symlinks(source_folder,
551
+ images_folder,
552
+ labels_folder,
553
+ class_list_file=None,
554
+ class_list_output_name='object.data',
555
+ force_lowercase_image_extension=False):
556
+ """
557
+ Given a YOLO-formatted folder of images and .txt files, creates a folder
558
+ of symlinks to all the images, and a folder of symlinks to all the labels.
559
+ Used to support preview/editing tools that assume images and labels are in separate
560
+ folders.
561
+
562
+ Args:
563
+ source_folder (str): input folder
564
+ images_folder (str): output folder with links to images
565
+ labels_folder (str): output folder with links to labels
566
+ class_list_file (str, optional): list to classes.txt file
567
+ class_list_output_name (str, optional): output file to write with class information
568
+ force_lowercase_image_extension (bool, False): create symlinks with, e.g., .jpg, even
569
+ if the input image is, e.g., .JPG
570
+
571
+ :meta private:
572
+ """
573
+
574
+ assert source_folder != images_folder and source_folder != labels_folder
575
+
576
+ os.makedirs(images_folder,exist_ok=True)
577
+ os.makedirs(labels_folder,exist_ok=True)
578
+
579
+ image_files_relative = find_images(source_folder,recursive=True,return_relative_paths=True)
580
+
581
+ # image_fn_relative = image_files_relative[0]=
582
+ for image_fn_relative in tqdm(image_files_relative):
583
+
584
+ source_file_abs = os.path.join(source_folder,image_fn_relative)
585
+ target_file_abs = os.path.join(images_folder,image_fn_relative)
586
+
587
+ if force_lowercase_image_extension:
588
+ tokens = os.path.splitext(target_file_abs)
589
+ target_file_abs = tokens[0] + tokens[1].lower()
590
+
591
+ os.makedirs(os.path.dirname(target_file_abs),exist_ok=True)
592
+ safe_create_link(source_file_abs,target_file_abs)
593
+ source_annotation_file_abs = os.path.splitext(source_file_abs)[0] + '.txt'
594
+ if os.path.isfile(source_annotation_file_abs):
595
+ target_annotation_file_abs = \
596
+ os.path.splitext(os.path.join(labels_folder,image_fn_relative))[0] + '.txt'
597
+ os.makedirs(os.path.dirname(target_annotation_file_abs),exist_ok=True)
598
+ safe_create_link(source_annotation_file_abs,target_annotation_file_abs)
599
+
600
+ # ...for each image
601
+
602
+ if class_list_file is not None:
603
+ target_class_list_file = os.path.join(labels_folder,class_list_output_name)
604
+ safe_create_link(class_list_file,target_class_list_file)
605
+
606
+ # ...def create_yolo_symlinks(...)
607
+
608
+
609
+ #%% Interactive driver
610
+
611
+ if False:
612
+
613
+ pass
614
+
615
+ #%% Options
616
+
617
+ input_file = os.path.expanduser('~/data/md-test-coco.json')
618
+ image_folder = os.path.expanduser('~/data/md-test')
619
+ output_folder = os.path.expanduser('~/data/md-test-yolo')
620
+ create_image_and_label_folders=False
621
+ class_file_name='classes.txt'
622
+ allow_empty_annotations=False
623
+ clip_boxes=False
624
+ image_id_to_output_image_json_file=None
625
+ images_to_exclude=None
626
+ path_replacement_char='#'
627
+ category_names_to_exclude=None
628
+
629
+
630
+ #%% Programmatic execution
631
+
632
+ coco_to_yolo_results = coco_to_yolo(image_folder,output_folder,input_file,
633
+ source_format='coco',
634
+ overwrite_images=False,
635
+ create_image_and_label_folders=create_image_and_label_folders,
636
+ class_file_name=class_file_name,
637
+ allow_empty_annotations=allow_empty_annotations,
638
+ clip_boxes=clip_boxes)
639
+
640
+ create_yolo_symlinks(source_folder=output_folder,
641
+ images_folder=output_folder + '/images',
642
+ labels_folder=output_folder + '/labels',
643
+ class_list_file=coco_to_yolo_results['class_list_filename'],
644
+ class_list_output_name='object.data',
645
+ force_lowercase_image_extension=True)
646
+
647
+
648
+ #%% Prepare command-line example
649
+
650
+ s = 'python coco_to_yolo.py {} {} {} --create_bounding_box_editor_symlinks'.format(
651
+ image_folder,output_folder,input_file)
652
+ print(s)
653
+ import clipboard; clipboard.copy(s)
654
+
655
+
656
+ #%% Command-line driver
657
+
658
+ def main(): # noqa
659
+
660
+ parser = argparse.ArgumentParser(
661
+ description='Convert COCO-formatted data to YOLO format, flattening the image structure')
662
+
663
+ # input_image_folder,output_folder,input_file
664
+
665
+ parser.add_argument(
666
+ 'input_folder',
667
+ type=str,
668
+ help='Path to input images')
669
+
670
+ parser.add_argument(
671
+ 'output_folder',
672
+ type=str,
673
+ help='Path to flat, YOLO-formatted dataset')
674
+
675
+ parser.add_argument(
676
+ 'input_file',
677
+ type=str,
678
+ help='Path to COCO dataset file (.json)')
679
+
680
+ parser.add_argument(
681
+ '--create_bounding_box_editor_symlinks',
682
+ action='store_true',
683
+ help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
684
+
685
+ if len(sys.argv[1:]) == 0:
686
+ parser.print_help()
687
+ parser.exit()
688
+
689
+ args = parser.parse_args()
690
+
691
+ coco_to_yolo_results = coco_to_yolo(args.input_folder,args.output_folder,args.input_file)
692
+
693
+ if args.create_bounding_box_editor_symlinks:
694
+ create_yolo_symlinks(source_folder=args.output_folder,
695
+ images_folder=args.output_folder + '/images',
696
+ labels_folder=args.output_folder + '/labels',
697
+ class_list_file=coco_to_yolo_results['class_list_filename'],
698
+ class_list_output_name='object.data',
699
+ force_lowercase_image_extension=True)
700
+
701
+ if __name__ == '__main__':
702
+ main()