megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,876 @@
1
+ """
2
+
3
+ yolo_to_coco.py
4
+
5
+ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import json
12
+ import os
13
+ import argparse
14
+ import sys
15
+
16
+ from multiprocessing.pool import ThreadPool
17
+ from multiprocessing.pool import Pool
18
+ from functools import partial
19
+
20
+ from tqdm import tqdm
21
+
22
+ from megadetector.utils.path_utils import find_images
23
+ from megadetector.utils.path_utils import recursive_file_list
24
+ from megadetector.utils.path_utils import find_image_strings
25
+ from megadetector.utils.ct_utils import invert_dictionary
26
+ from megadetector.utils.ct_utils import write_json
27
+ from megadetector.visualization.visualization_utils import open_image
28
+ from megadetector.data_management.yolo_output_to_md_output import \
29
+ read_classes_from_yolo_dataset_file
30
+
31
+
32
+ #%% Support functions
33
+
34
+ def _filename_to_image_id(fn):
35
+ """
36
+ Image IDs can't have spaces in them, replace spaces with underscores
37
+ """
38
+
39
+ return fn.replace(' ','_').replace('\\','/')
40
+
41
+
42
+ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
43
+ """
44
+ Internal support function for processing one image's labels.
45
+ """
46
+
47
+ # Create the image object for this image
48
+ #
49
+ # Always use forward slashes in image filenames and IDs
50
+ image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
51
+ image_id = _filename_to_image_id(image_fn_relative)
52
+
53
+ # This is done in a separate loop now
54
+ #
55
+ # assert image_id not in image_ids, \
56
+ # 'Oops, you have hit a very esoteric case where you have the same filename ' + \
57
+ # 'with both spaces and underscores, this is not currently handled.'
58
+ # image_ids.add(image_id)
59
+
60
+ im = {}
61
+ im['file_name'] = image_fn_relative
62
+ im['id'] = image_id
63
+
64
+ annotations_this_image = []
65
+
66
+ try:
67
+ pil_im = open_image(fn_abs)
68
+ im_width, im_height = pil_im.size
69
+ im['width'] = im_width
70
+ im['height'] = im_height
71
+ im['error'] = None
72
+ except Exception as e:
73
+ print('Warning: error reading {}:\n{}'.format(image_fn_relative,str(e)))
74
+ im['width'] = -1
75
+ im['height'] = -1
76
+ im['error'] = str(e)
77
+ return (im,annotations_this_image)
78
+
79
+ # Is there an annotation file for this image?
80
+ if label_folder is not None:
81
+ assert input_folder in fn_abs
82
+ label_file_abs_base = fn_abs.replace(input_folder,label_folder)
83
+ else:
84
+ label_file_abs_base = fn_abs
85
+
86
+ annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
87
+ if not os.path.isfile(annotation_file):
88
+ annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
89
+
90
+ if os.path.isfile(annotation_file):
91
+
92
+ with open(annotation_file,'r') as f:
93
+ lines = f.readlines()
94
+ lines = [s.strip() for s in lines]
95
+
96
+ # s = lines[0]
97
+ annotation_number = 0
98
+
99
+ for s in lines:
100
+
101
+ if len(s.strip()) == 0:
102
+ continue
103
+
104
+ tokens = s.split()
105
+ assert len(tokens) == 5
106
+ category_id = int(tokens[0])
107
+ assert category_id in category_id_to_name, \
108
+ 'Unrecognized category ID {} in annotation file {}'.format(
109
+ category_id,annotation_file)
110
+ ann = {}
111
+ ann['id'] = im['id'] + '_' + str(annotation_number)
112
+ ann['image_id'] = im['id']
113
+ ann['category_id'] = category_id
114
+ ann['sequence_level_annotation'] = False
115
+
116
+ # COCO: [x_min, y_min, width, height] in absolute coordinates
117
+ # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
118
+
119
+ yolo_bbox = [float(x) for x in tokens[1:]]
120
+
121
+ normalized_x_center = yolo_bbox[0]
122
+ normalized_y_center = yolo_bbox[1]
123
+ normalized_width = yolo_bbox[2]
124
+ normalized_height = yolo_bbox[3]
125
+
126
+ absolute_x_center = normalized_x_center * im_width
127
+ absolute_y_center = normalized_y_center * im_height
128
+ absolute_width = normalized_width * im_width
129
+ absolute_height = normalized_height * im_height
130
+ absolute_x_min = absolute_x_center - absolute_width / 2
131
+ absolute_y_min = absolute_y_center - absolute_height / 2
132
+
133
+ coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
134
+
135
+ ann['bbox'] = coco_bbox
136
+ annotation_number += 1
137
+
138
+ annotations_this_image.append(ann)
139
+
140
+ # ...for each annotation
141
+
142
+ # ...if this image has annotations
143
+
144
+ return (im,annotations_this_image)
145
+
146
+ # ...def _process_image(...)
147
+
148
+
149
+ def load_yolo_class_list(class_name_file):
150
+ """
151
+ Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
152
+ [class_name_file].
153
+
154
+ Args:
155
+ class_name_file (str or list): this can be:
156
+ - a .yaml or .yaml file in YOLO's dataset.yaml format
157
+ - a .txt or .data file containing a flat list of class names
158
+ - a list of class names
159
+
160
+ Returns:
161
+ dict: A dict mapping zero-indexed integer IDs to class names
162
+ """
163
+
164
+ # class_name_file can also be a list of class names
165
+ if isinstance(class_name_file,list):
166
+ category_id_to_name = {}
167
+ for i_name,name in enumerate(class_name_file):
168
+ category_id_to_name[i_name] = name
169
+ return category_id_to_name
170
+
171
+ ext = os.path.splitext(class_name_file)[1][1:]
172
+ assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
173
+ class_name_file)
174
+
175
+ if ext in ('txt','data'):
176
+
177
+ with open(class_name_file,'r') as f:
178
+ lines = f.readlines()
179
+ lines = [s.strip() for s in lines]
180
+ assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
181
+ assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
182
+
183
+ # Blank lines should only appear at the end
184
+ b_found_blank = False
185
+ for s in lines:
186
+ if len(s) == 0:
187
+ b_found_blank = True
188
+ elif b_found_blank:
189
+ raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
190
+ class_name_file))
191
+
192
+ category_id_to_name = {}
193
+ for i_category_id,category_name in enumerate(lines):
194
+ assert len(category_name) > 0
195
+ category_id_to_name[i_category_id] = category_name
196
+
197
+ else:
198
+
199
+ assert ext in ('yml','yaml')
200
+ category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
201
+
202
+ return category_id_to_name
203
+
204
+ # ...load_yolo_class_list(...)
205
+
206
+
207
+ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
208
+ """"
209
+ Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
210
+
211
+ Args:
212
+ label_file (str): the .txt file to validate
213
+ category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
214
+ if this is not None, this function errors if the file uses a category that's not
215
+ in this dict
216
+ verbose (bool, optional): enable additional debug console output
217
+
218
+ Returns:
219
+ dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
220
+ errors (if any) that we found in this file)
221
+ """
222
+
223
+ label_result = {}
224
+ label_result['file'] = label_file
225
+ label_result['errors'] = []
226
+
227
+ try:
228
+ with open(label_file,'r') as f:
229
+ lines = f.readlines()
230
+ except Exception as e:
231
+ label_result['errors'].append('Read error: {}'.format(str(e)))
232
+ return label_result
233
+
234
+ # i_line 0; line = lines[i_line]
235
+ for i_line,line in enumerate(lines):
236
+ s = line.strip()
237
+ if len(s) == 0 or s[0] == '#':
238
+ continue
239
+
240
+ try:
241
+
242
+ tokens = s.split()
243
+ assert len(tokens) == 5, '{} tokens'.format(len(tokens))
244
+
245
+ if category_id_to_name is not None:
246
+ category_id = int(tokens[0])
247
+ assert category_id in category_id_to_name, \
248
+ 'Unrecognized category ID {}'.format(category_id)
249
+
250
+ yolo_bbox = [float(x) for x in tokens[1:]]
251
+
252
+ except Exception as e:
253
+ label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
254
+ continue
255
+
256
+ normalized_x_center = yolo_bbox[0]
257
+ normalized_y_center = yolo_bbox[1]
258
+ normalized_width = yolo_bbox[2]
259
+ normalized_height = yolo_bbox[3]
260
+
261
+ normalized_x_min = normalized_x_center - normalized_width / 2.0
262
+ normalized_x_max = normalized_x_center + normalized_width / 2.0
263
+ normalized_y_min = normalized_y_center - normalized_height / 2.0
264
+ normalized_y_max = normalized_y_center + normalized_height / 2.0
265
+
266
+ if normalized_x_min < 0 or normalized_y_min < 0 or \
267
+ normalized_x_max > 1 or normalized_y_max > 1:
268
+ label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
269
+ normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
270
+
271
+ # ...for each line
272
+
273
+ if verbose:
274
+ if len(label_result['errors']) > 0:
275
+ print('Errors for {}:'.format(label_file))
276
+ for error in label_result['errors']:
277
+ print(error)
278
+
279
+ return label_result
280
+
281
+ # ...def validate_label_file(...)
282
+
283
+
284
+ def validate_yolo_dataset(input_folder,
285
+ class_name_file,
286
+ n_workers=1,
287
+ pool_type='thread',
288
+ verbose=False):
289
+ """
290
+ Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
291
+ labels and images are in different folders (yolo_to_coco() supports this).
292
+
293
+ Looks for:
294
+
295
+ * Image files without label files
296
+ * Text files without image files
297
+ * Illegal classes in label files
298
+ * Invalid boxes in label files
299
+
300
+ Args:
301
+ input_folder (str): the YOLO dataset folder to validate
302
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
303
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
304
+ input_folder as the base folder, though this is not explicitly checked.
305
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
306
+ parallelization
307
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
308
+ not used if [n_workers] <= 1
309
+ verbose (bool, optional): enable additional debug console output
310
+
311
+ Returns:
312
+ dict: validation results, as a dict with fields:
313
+
314
+ - image_files_without_label_files (list)
315
+ - label_files_without_image_files (list)
316
+ - label_results (list of dicts with field 'filename', 'errors') (list)
317
+ """
318
+
319
+ # Validate arguments
320
+ assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
321
+ if n_workers > 1:
322
+ assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
323
+
324
+ category_id_to_name = load_yolo_class_list(class_name_file)
325
+
326
+ print('Enumerating files in {}'.format(input_folder))
327
+
328
+ all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
329
+ convert_slashes=True)
330
+ label_files = [fn for fn in all_files if fn.endswith('.txt')]
331
+ image_files = find_image_strings(all_files)
332
+ print('Found {} images files and {} label files in {}'.format(
333
+ len(image_files),len(label_files),input_folder))
334
+
335
+ label_files_set = set(label_files)
336
+
337
+ image_files_without_extension = set()
338
+ for fn in image_files:
339
+ image_file_without_extension = os.path.splitext(fn)[0]
340
+ assert image_file_without_extension not in image_files_without_extension, \
341
+ 'Duplicate image file, likely with different extensions: {}'.format(fn)
342
+ image_files_without_extension.add(image_file_without_extension)
343
+
344
+ print('Looking for missing image/label files')
345
+
346
+ image_files_without_label_files = []
347
+ label_files_without_images = []
348
+
349
+ for image_file in tqdm(image_files):
350
+ expected_label_file = os.path.splitext(image_file)[0] + '.txt'
351
+ if expected_label_file not in label_files_set:
352
+ image_files_without_label_files.append(image_file)
353
+
354
+ for label_file in tqdm(label_files):
355
+ expected_image_file_without_extension = os.path.splitext(label_file)[0]
356
+ if expected_image_file_without_extension not in image_files_without_extension:
357
+ label_files_without_images.append(label_file)
358
+
359
+ print('Found {} image files without labels, {} labels without images'.format(
360
+ len(image_files_without_label_files),len(label_files_without_images)))
361
+
362
+ print('Validating label files')
363
+
364
+ if n_workers <= 1:
365
+
366
+ label_results = []
367
+ for fn_abs in tqdm(label_files):
368
+ label_results.append(validate_label_file(fn_abs,
369
+ category_id_to_name=category_id_to_name,
370
+ verbose=verbose))
371
+
372
+ else:
373
+
374
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
375
+
376
+ pool = None
377
+ try:
378
+ if pool_type == 'thread':
379
+ pool = ThreadPool(n_workers)
380
+ else:
381
+ pool = Pool(n_workers)
382
+
383
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
384
+
385
+ p = partial(validate_label_file,
386
+ category_id_to_name=category_id_to_name,
387
+ verbose=verbose)
388
+ label_results = list(tqdm(pool.imap(p, label_files),
389
+ total=len(label_files)))
390
+ finally:
391
+ if pool is not None:
392
+ pool.close()
393
+ pool.join()
394
+ print('Pool closed and joined for label file validation')
395
+
396
+ assert len(label_results) == len(label_files)
397
+
398
+ validation_results = {}
399
+ validation_results['image_files_without_label_files'] = image_files_without_label_files
400
+ validation_results['label_files_without_images'] = label_files_without_images
401
+ validation_results['label_results'] = label_results
402
+
403
+ return validation_results
404
+
405
+ # ...validate_yolo_dataset(...)
406
+
407
+
408
+ #%% Main conversion function
409
+
410
+ def yolo_to_coco(input_folder,
411
+ class_name_file,
412
+ output_file=None,
413
+ empty_image_handling='no_annotations',
414
+ empty_image_category_name='empty',
415
+ error_image_handling='no_annotations',
416
+ allow_images_without_label_files=True,
417
+ n_workers=1,
418
+ pool_type='thread',
419
+ recursive=True,
420
+ exclude_string=None,
421
+ include_string=None,
422
+ overwrite_handling='overwrite',
423
+ label_folder=None):
424
+ """
425
+ Converts a YOLO-formatted dataset to a COCO-formatted dataset.
426
+
427
+ All images will be assigned an "error" value, usually None.
428
+
429
+ Args:
430
+ input_folder (str): the YOLO dataset folder to convert. If the image and label
431
+ folders are different, this is the image folder, and [label_folder] is the
432
+ label folder.
433
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
434
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
435
+ input_folder as the base folder, though this is not explicitly checked.
436
+ output_file (str, optional): .json file to which we should write COCO .json data
437
+ empty_image_handling (str, optional): how to handle images with no boxes; whether
438
+ this includes images with no .txt files depends on the value of
439
+ [allow_images_without_label_files]. Can be:
440
+
441
+ - 'no_annotations': include the image in the image list, with no annotations
442
+ - 'empty_annotations': include the image in the image list, and add an annotation without
443
+ any bounding boxes, using a category called [empty_image_category_name].
444
+ - 'skip': don't include the image in the image list
445
+ - 'error': there shouldn't be any empty images
446
+ empty_image_category_name (str, optional): if we're going to be inserting annotations for
447
+ images with no boxes, what category name should we use?
448
+ error_image_handling (str, optional): how to handle images that don't load properly; can
449
+ be:
450
+
451
+ - 'skip': don't include the image at all
452
+ - 'no_annotations': include with no annotations
453
+ allow_images_without_label_files (bool, optional): whether to silently allow images with
454
+ no label files (True) or raise errors for images with no label files (False)
455
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
456
+ parallelization
457
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
458
+ not used if [n_workers] <= 1
459
+ recursive (bool, optional): whether to recurse into [input_folder]
460
+ exclude_string (str, optional): exclude any images whose filename contains a string
461
+ include_string (str, optional): include only images whose filename contains a string
462
+ overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
463
+ 'error')
464
+ label_folder (str, optional): label folder, if different from the image folder
465
+
466
+ Returns:
467
+ dict: COCO-formatted data, the same as what's written to [output_file]
468
+ """
469
+
470
+ ## Validate input
471
+
472
+ input_folder = input_folder.replace('\\','/')
473
+
474
+ assert os.path.isdir(input_folder)
475
+ if isinstance(class_name_file,str):
476
+ assert os.path.isfile(class_name_file)
477
+
478
+ assert empty_image_handling in \
479
+ ('no_annotations','empty_annotations','skip','error'), \
480
+ 'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
481
+
482
+ if (output_file is not None) and os.path.isfile(output_file):
483
+
484
+ if overwrite_handling == 'overwrite':
485
+ print('Warning: output file {} exists, over-writing'.format(output_file))
486
+ elif overwrite_handling == 'load':
487
+ print('Output file {} exists, loading and returning'.format(output_file))
488
+ with open(output_file,'r') as f:
489
+ d = json.load(f)
490
+ return d
491
+ elif overwrite_handling == 'error':
492
+ raise ValueError('Output file {} exists'.format(output_file))
493
+ else:
494
+ raise ValueError('Unrecognized overwrite_handling value: {}'.format(overwrite_handling))
495
+
496
+
497
+ ## Read class names
498
+
499
+ category_id_to_name = load_yolo_class_list(class_name_file)
500
+
501
+
502
+ # Find or create the empty image category, if necessary
503
+ empty_category_id = None
504
+
505
+ if empty_image_handling == 'empty_annotations':
506
+ category_name_to_id = invert_dictionary(category_id_to_name)
507
+ if empty_image_category_name in category_name_to_id:
508
+ empty_category_id = category_name_to_id[empty_image_category_name]
509
+ print('Using existing empty image category with name {}, ID {}'.format(
510
+ empty_image_category_name,empty_category_id))
511
+ else:
512
+ empty_category_id = len(category_id_to_name)
513
+ print('Adding an empty category with name {}, ID {}'.format(
514
+ empty_image_category_name,empty_category_id))
515
+ category_id_to_name[empty_category_id] = empty_image_category_name
516
+
517
+
518
+ ## Enumerate images
519
+
520
+ print('Enumerating images...')
521
+
522
+ image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
523
+ assert not any(['\\' in fn for fn in image_files_abs])
524
+
525
+ n_files_original = len(image_files_abs)
526
+
527
+ # Optionally include/exclude images matching specific strings
528
+ if exclude_string is not None:
529
+ image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
530
+ if include_string is not None:
531
+ image_files_abs = [fn for fn in image_files_abs if include_string in fn]
532
+
533
+ if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
534
+ n_excluded = n_files_original - len(image_files_abs)
535
+ print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
536
+
537
+ categories = []
538
+
539
+ for category_id in category_id_to_name:
540
+ categories.append({'id':category_id,'name':category_id_to_name[category_id]})
541
+
542
+ info = {}
543
+ info['version'] = '1.0'
544
+ info['description'] = 'Converted from YOLO format'
545
+
546
+ image_ids = set()
547
+
548
+
549
+ ## If we're expected to have labels for every image, check before we process all the images
550
+
551
+ if not allow_images_without_label_files:
552
+ print('Verifying that label files exist')
553
+ # image_file_abs = image_files_abs[0]
554
+ for image_file_abs in tqdm(image_files_abs):
555
+ if label_folder is not None:
556
+ assert input_folder in image_file_abs
557
+ label_file_abs_base = image_file_abs.replace(input_folder,label_folder)
558
+ else:
559
+ label_file_abs_base = image_file_abs
560
+ label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
561
+ assert os.path.isfile(label_file_abs), \
562
+ 'No annotation file for {}'.format(image_file_abs)
563
+
564
+
565
+ ## Initial loop to make sure image IDs will be unique
566
+
567
+ print('Validating image IDs...')
568
+
569
+ for fn_abs in tqdm(image_files_abs):
570
+
571
+ fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
572
+ image_id = _filename_to_image_id(fn_relative)
573
+ assert image_id not in image_ids, \
574
+ 'Oops, you have hit a very esoteric case where you have the same filename ' + \
575
+ 'with both spaces and underscores, this is not currently handled.'
576
+ image_ids.add(image_id)
577
+
578
+
579
+ ## Main loop to process labels
580
+
581
+ print('Processing labels...')
582
+
583
+ if n_workers <= 1:
584
+
585
+ image_results = []
586
+ # fn_abs = image_files_abs[0]
587
+ for fn_abs in tqdm(image_files_abs):
588
+ image_results.append(_process_image(fn_abs,
589
+ input_folder,
590
+ category_id_to_name,
591
+ label_folder))
592
+
593
+ else:
594
+
595
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
596
+
597
+ pool = None
598
+ try:
599
+ if pool_type == 'thread':
600
+ pool = ThreadPool(n_workers)
601
+ else:
602
+ pool = Pool(n_workers)
603
+
604
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
605
+
606
+ p = partial(_process_image,
607
+ input_folder=input_folder,
608
+ category_id_to_name=category_id_to_name,
609
+ label_folder=label_folder)
610
+ image_results = list(tqdm(pool.imap(p, image_files_abs),
611
+ total=len(image_files_abs)))
612
+ finally:
613
+ if pool is not None:
614
+ pool.close()
615
+ pool.join()
616
+ print('Pool closed and joined for YOLO to COCO conversion')
617
+
618
+
619
+ assert len(image_results) == len(image_files_abs)
620
+
621
+
622
+ ## Re-assembly of results into a COCO dict
623
+
624
+ print('Assembling labels...')
625
+
626
+ images = []
627
+ annotations = []
628
+
629
+ for image_result in tqdm(image_results):
630
+
631
+ im = image_result[0]
632
+ annotations_this_image = image_result[1]
633
+
634
+ # If we have annotations for this image
635
+ if len(annotations_this_image) > 0:
636
+ assert im['error'] is None
637
+ images.append(im)
638
+ for ann in annotations_this_image:
639
+ annotations.append(ann)
640
+
641
+ # If this image failed to read
642
+ elif im['error'] is not None:
643
+
644
+ if error_image_handling == 'skip':
645
+ pass
646
+ elif error_image_handling == 'no_annotations':
647
+ images.append(im)
648
+
649
+ # If this image read successfully, but there are no annotations
650
+ else:
651
+
652
+ if empty_image_handling == 'skip':
653
+ pass
654
+ elif empty_image_handling == 'no_annotations':
655
+ images.append(im)
656
+ elif empty_image_handling == 'empty_annotations':
657
+ assert empty_category_id is not None
658
+ ann = {}
659
+ ann['id'] = im['id'] + '_0'
660
+ ann['image_id'] = im['id']
661
+ ann['category_id'] = empty_category_id
662
+ ann['sequence_level_annotation'] = False
663
+ # This would also be a reasonable thing to do, but it's not the convention
664
+ # we're adopting.
665
+ # ann['bbox'] = [0,0,0,0]
666
+ annotations.append(ann)
667
+ images.append(im)
668
+
669
+ # ...for each image result
670
+
671
+ # Clean up unnecessary error fields
672
+ for im in images:
673
+ if 'error' in im and im['error'] is None:
674
+ del im['error']
675
+
676
+ print('Read {} annotations for {} images'.format(len(annotations),
677
+ len(images)))
678
+
679
+ d = {}
680
+ d['images'] = images
681
+ d['annotations'] = annotations
682
+ d['categories'] = categories
683
+ d['info'] = info
684
+
685
+ if output_file is not None:
686
+ print('Writing to {}'.format(output_file))
687
+ write_json(output_file,d)
688
+
689
+ return d
690
+
691
+ # ...def yolo_to_coco()
692
+
693
+
694
+ #%% Interactive driver
695
+
696
+ if False:
697
+
698
+ pass
699
+
700
+ #%% Convert YOLO folders to COCO
701
+
702
+ preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
703
+ input_folder = '/home/user/data/noaa-fish/val'
704
+ output_file = '/home/user/data/noaa-fish/val.json'
705
+ class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
706
+
707
+ d = yolo_to_coco(input_folder,class_name_file,output_file)
708
+
709
+ input_folder = '/home/user/data/noaa-fish/train'
710
+ output_file = '/home/user/data/noaa-fish/train.json'
711
+ class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
712
+
713
+ d = yolo_to_coco(input_folder,class_name_file,output_file)
714
+
715
+
716
+ #%% Check DB integrity
717
+
718
+ from megadetector.data_management.databases import integrity_check_json_db
719
+
720
+ options = integrity_check_json_db.IntegrityCheckOptions()
721
+ options.baseDir = input_folder
722
+ options.bCheckImageSizes = False
723
+ options.bCheckImageExistence = True
724
+ options.bFindUnusedImages = True
725
+
726
+ _, _, _ = integrity_check_json_db.integrity_check_json_db(output_file, options)
727
+
728
+
729
+ #%% Preview some images
730
+
731
+ from megadetector.visualization import visualize_db
732
+
733
+ viz_options = visualize_db.DbVizOptions()
734
+ viz_options.num_to_visualize = None
735
+ viz_options.trim_to_images_with_bboxes = False
736
+ viz_options.add_search_links = False
737
+ viz_options.sort_by_filename = False
738
+ viz_options.parallelize_rendering = True
739
+ viz_options.include_filename_links = True
740
+
741
+ html_output_file, _ = visualize_db.visualize_db(db_path=output_file,
742
+ output_dir=preview_folder,
743
+ image_base_dir=input_folder,
744
+ options=viz_options)
745
+
746
+ from megadetector.utils.path_utils import open_file
747
+ open_file(html_output_file)
748
+
749
+
750
+ #%% Command-line driver
751
+
752
+ def main():
753
+ """
754
+ Command-line driver for YOLO to COCO conversion.
755
+ """
756
+
757
+ parser = argparse.ArgumentParser(
758
+ description='Convert a YOLO-formatted dataset to COCO format'
759
+ )
760
+ parser.add_argument(
761
+ 'input_folder',
762
+ type=str,
763
+ help='Path to the YOLO dataset folder (image folder)'
764
+ )
765
+ parser.add_argument(
766
+ 'class_name_file',
767
+ type=str,
768
+ help='Path to the file containing class names (e.g., classes.txt or dataset.yaml)'
769
+ )
770
+ parser.add_argument(
771
+ 'output_file',
772
+ type=str,
773
+ help='Path to the output COCO .json file.'
774
+ )
775
+ parser.add_argument(
776
+ '--label_folder',
777
+ type=str,
778
+ default=None,
779
+ help='Label folder, if different from the image folder. Default: None (labels are in the image folder)'
780
+ )
781
+ parser.add_argument(
782
+ '--empty_image_handling',
783
+ type=str,
784
+ default='no_annotations',
785
+ choices=['no_annotations', 'empty_annotations', 'skip', 'error'],
786
+ help='How to handle images with no bounding boxes.'
787
+ )
788
+ parser.add_argument(
789
+ '--empty_image_category_name',
790
+ type=str,
791
+ default='empty',
792
+ help='Category name for empty images if empty_image_handling is "empty_annotations"'
793
+ )
794
+ parser.add_argument(
795
+ '--error_image_handling',
796
+ type=str,
797
+ default='no_annotations',
798
+ choices=['skip', 'no_annotations'],
799
+ help='How to handle images that fail to load'
800
+ )
801
+ parser.add_argument(
802
+ '--allow_images_without_label_files',
803
+ type=str,
804
+ default='true',
805
+ choices=['true', 'false'],
806
+ help='Whether to allow images that do not have corresponding label files (true/false)'
807
+ )
808
+ parser.add_argument(
809
+ '--n_workers',
810
+ type=int,
811
+ default=1,
812
+ help='Number of workers for parallel processing. <=1 for sequential'
813
+ )
814
+ parser.add_argument(
815
+ '--pool_type',
816
+ type=str,
817
+ default='thread',
818
+ choices=['thread', 'process'],
819
+ help='Type of multiprocessing pool if n_workers > 1'
820
+ )
821
+ parser.add_argument(
822
+ '--recursive',
823
+ type=str,
824
+ default='true',
825
+ choices=['true', 'false'],
826
+ help='Whether to search for images recursively in the input folder (true/false)'
827
+ )
828
+ parser.add_argument(
829
+ '--exclude_string',
830
+ type=str,
831
+ default=None,
832
+ help='Exclude images whose filename contains this string'
833
+ )
834
+ parser.add_argument(
835
+ '--include_string',
836
+ type=str,
837
+ default=None,
838
+ help='Include images only if filename contains this string'
839
+ )
840
+ parser.add_argument(
841
+ '--overwrite_handling',
842
+ type=str,
843
+ default='overwrite',
844
+ choices=['load', 'overwrite', 'error'],
845
+ help='Behavior if output_file exists.'
846
+ )
847
+
848
+ if len(sys.argv[1:]) == 0:
849
+ parser.print_help()
850
+ parser.exit()
851
+
852
+ args = parser.parse_args()
853
+
854
+ parsed_allow_images = args.allow_images_without_label_files.lower() == 'true'
855
+ parsed_recursive = args.recursive.lower() == 'true'
856
+
857
+ yolo_to_coco(
858
+ args.input_folder,
859
+ args.class_name_file,
860
+ output_file=args.output_file,
861
+ label_folder=args.label_folder,
862
+ empty_image_handling=args.empty_image_handling,
863
+ empty_image_category_name=args.empty_image_category_name,
864
+ error_image_handling=args.error_image_handling,
865
+ allow_images_without_label_files=parsed_allow_images,
866
+ n_workers=args.n_workers,
867
+ pool_type=args.pool_type,
868
+ recursive=parsed_recursive,
869
+ exclude_string=args.exclude_string,
870
+ include_string=args.include_string,
871
+ overwrite_handling=args.overwrite_handling
872
+ )
873
+ print(f"Dataset conversion complete, output written to {args.output_file}")
874
+
875
+ if __name__ == '__main__':
876
+ main()