megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,50 +1,165 @@
1
- ########
2
- #
3
- # yolo_to_coco.py
4
- #
5
- # Converts a YOLO-formatted dataset to a COCO-formatted dataset.
6
- #
7
- # Currently supports only a single folder (i.e., no recursion). Treats images without
8
- # corresponding .txt files as empty.
9
- #
10
- ########
1
+ """
2
+
3
+ yolo_to_coco.py
4
+
5
+ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
+
7
+ """
11
8
 
12
9
  #%% Imports and constants
13
10
 
14
11
  import json
15
12
  import os
16
13
 
17
- from PIL import Image
14
+ from multiprocessing.pool import ThreadPool
15
+ from multiprocessing.pool import Pool
16
+ from functools import partial
17
+
18
18
  from tqdm import tqdm
19
19
 
20
20
  from md_utils.path_utils import find_images
21
+ from md_utils.path_utils import recursive_file_list
22
+ from md_utils.path_utils import find_image_strings
23
+ from md_utils.ct_utils import invert_dictionary
24
+ from md_visualization.visualization_utils import open_image
21
25
  from data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
22
26
 
23
27
 
24
- #%% Main conversion function
28
+ #%% Support functions
25
29
 
26
- def yolo_to_coco(input_folder,class_name_file,output_file=None):
30
+ def _filename_to_image_id(fn):
27
31
  """
28
- Convert the YOLO-formatted data in [input_folder] to a COCO-formatted dictionary,
29
- reading class names from [class_name_file], which can be a flat list with a .txt
30
- extension or a YOLO dataset.yml file. Optionally writes the output dataset to [output_file].
31
-
32
- Returns a COCO-formatted dictionary.
32
+ Image IDs can't have spaces in them, replae spaces with underscores
33
+ """
34
+ return fn.replace(' ','_')
35
+
36
+
37
+ def _process_image(fn_abs,input_folder,category_id_to_name):
38
+ """
39
+ Internal support function for processing one image's labels.
33
40
  """
34
41
 
35
- # Validate input
42
+ # Create the image object for this image
43
+ fn_relative = os.path.relpath(fn_abs,input_folder)
44
+ image_id = _filename_to_image_id(fn_relative)
36
45
 
37
- assert os.path.isdir(input_folder)
38
- assert os.path.isfile(class_name_file)
46
+ # This is done in a separate loop now
47
+ #
48
+ # assert image_id not in image_ids, \
49
+ # 'Oops, you have hit a very esoteric case where you have the same filename ' + \
50
+ # 'with both spaces and underscores, this is not currently handled.'
51
+ # image_ids.add(image_id)
52
+
53
+ im = {}
54
+ im['file_name'] = fn_relative
55
+ im['id'] = image_id
56
+
57
+ annotations_this_image = []
58
+
59
+ try:
60
+ pil_im = open_image(fn_abs)
61
+ im_width, im_height = pil_im.size
62
+ im['width'] = im_width
63
+ im['height'] = im_height
64
+ im['error'] = None
65
+ except Exception as e:
66
+ print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
67
+ im['width'] = -1
68
+ im['height'] = -1
69
+ im['error'] = str(e)
70
+ return (im,annotations_this_image)
71
+
72
+ # Is there an annotation file for this image?
73
+ annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
74
+ if not os.path.isfile(annotation_file):
75
+ annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
76
+
77
+ if os.path.isfile(annotation_file):
78
+
79
+ with open(annotation_file,'r') as f:
80
+ lines = f.readlines()
81
+ lines = [s.strip() for s in lines]
82
+
83
+ # s = lines[0]
84
+ annotation_number = 0
85
+
86
+ for s in lines:
87
+
88
+ if len(s.strip()) == 0:
89
+ continue
90
+
91
+ tokens = s.split()
92
+ assert len(tokens) == 5
93
+ category_id = int(tokens[0])
94
+ assert category_id in category_id_to_name, \
95
+ 'Unrecognized category ID {} in annotation file {}'.format(
96
+ category_id,annotation_file)
97
+ ann = {}
98
+ ann['id'] = im['id'] + '_' + str(annotation_number)
99
+ ann['image_id'] = im['id']
100
+ ann['category_id'] = category_id
101
+ ann['sequence_level_annotation'] = False
102
+
103
+ # COCO: [x_min, y_min, width, height] in absolute coordinates
104
+ # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
105
+
106
+ yolo_bbox = [float(x) for x in tokens[1:]]
107
+
108
+ normalized_x_center = yolo_bbox[0]
109
+ normalized_y_center = yolo_bbox[1]
110
+ normalized_width = yolo_bbox[2]
111
+ normalized_height = yolo_bbox[3]
112
+
113
+ absolute_x_center = normalized_x_center * im_width
114
+ absolute_y_center = normalized_y_center * im_height
115
+ absolute_width = normalized_width * im_width
116
+ absolute_height = normalized_height * im_height
117
+ absolute_x_min = absolute_x_center - absolute_width / 2
118
+ absolute_y_min = absolute_y_center - absolute_height / 2
119
+
120
+ coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
121
+
122
+ ann['bbox'] = coco_bbox
123
+ annotation_number += 1
124
+
125
+ annotations_this_image.append(ann)
126
+
127
+ # ...for each annotation
128
+
129
+ # ...if this image has annotations
39
130
 
131
+ return (im,annotations_this_image)
132
+
133
+ # ...def _process_image(...)
134
+
135
+
136
+ def load_yolo_class_list(class_name_file):
137
+ """
138
+ Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
139
+ [class_name_file].
40
140
 
41
- # Read class names
141
+ Args:
142
+ class_name_file (str or list): this can be:
143
+ - a .yaml or .yaml file in YOLO's dataset.yaml format
144
+ - a .txt or .data file containing a flat list of class names
145
+ - a list of class names
146
+
147
+ Returns:
148
+ dict: A dict mapping zero-indexed integer IDs to class names
149
+ """
42
150
 
151
+ # class_name_file can also be a list of class names
152
+ if isinstance(class_name_file,list):
153
+ category_id_to_name = {}
154
+ for i_name,name in enumerate(class_name_file):
155
+ category_id_to_name[i_name] = name
156
+ return category_id_to_name
157
+
43
158
  ext = os.path.splitext(class_name_file)[1][1:]
44
- assert ext in ('yml','txt','yaml'), 'Unrecognized class name file type {}'.format(
159
+ assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
45
160
  class_name_file)
46
161
 
47
- if ext == 'txt':
162
+ if ext in ('txt','data'):
48
163
 
49
164
  with open(class_name_file,'r') as f:
50
165
  lines = f.readlines()
@@ -70,14 +185,302 @@ def yolo_to_coco(input_folder,class_name_file,output_file=None):
70
185
 
71
186
  assert ext in ('yml','yaml')
72
187
  category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
188
+
189
+ return category_id_to_name
190
+
191
+ # ...load_yolo_class_list(...)
192
+
193
+
194
+ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
195
+ """"
196
+ Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
197
+
198
+ Args:
199
+ label_file (str): the .txt file to validate
200
+ category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
201
+ if this is not None, this function errors if the file uses a category that's not
202
+ in this dict
203
+ verbose (bool, optional): enable additional debug console output
204
+
205
+ Returns:
206
+ dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
207
+ errors (if any) that we found in this file)
208
+ """
209
+
210
+ label_result = {}
211
+ label_result['file'] = label_file
212
+ label_result['errors'] = []
213
+
214
+ try:
215
+ with open(label_file,'r') as f:
216
+ lines = f.readlines()
217
+ except Exception as e:
218
+ label_result['errors'].append('Read error: {}'.format(str(e)))
219
+ return label_result
220
+
221
+ # i_line 0; line = lines[i_line]
222
+ for i_line,line in enumerate(lines):
223
+ s = line.strip()
224
+ if len(s) == 0 or s[0] == '#':
225
+ continue
226
+
227
+ try:
228
+
229
+ tokens = s.split()
230
+ assert len(tokens) == 5, '{} tokens'.format(len(tokens))
231
+
232
+ if category_id_to_name is not None:
233
+ category_id = int(tokens[0])
234
+ assert category_id in category_id_to_name, \
235
+ 'Unrecognized category ID {}'.format(category_id)
236
+
237
+ yolo_bbox = [float(x) for x in tokens[1:]]
73
238
 
239
+ except Exception as e:
240
+ label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
241
+ continue
242
+
243
+ normalized_x_center = yolo_bbox[0]
244
+ normalized_y_center = yolo_bbox[1]
245
+ normalized_width = yolo_bbox[2]
246
+ normalized_height = yolo_bbox[3]
247
+
248
+ normalized_x_min = normalized_x_center - normalized_width / 2.0
249
+ normalized_x_max = normalized_x_center + normalized_width / 2.0
250
+ normalized_y_min = normalized_y_center - normalized_height / 2.0
251
+ normalized_y_max = normalized_y_center + normalized_height / 2.0
74
252
 
75
- # Enumerate images
253
+ if normalized_x_min < 0 or normalized_y_min < 0 or \
254
+ normalized_x_max > 1 or normalized_y_max > 1:
255
+ label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
256
+ normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
257
+
258
+ # ...for each line
259
+
260
+ if verbose:
261
+ if len(label_result['errors']) > 0:
262
+ print('Errors for {}:'.format(label_file))
263
+ for error in label_result['errors']:
264
+ print(error)
265
+
266
+ return label_result
76
267
 
77
- image_files = find_images(input_folder,recursive=False)
268
+ # ...def validate_label_file(...)
78
269
 
79
- images = []
80
- annotations = []
270
+
271
+ def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
272
+ """
273
+ Verifies all the labels in a YOLO dataset folder.
274
+
275
+ Looks for:
276
+
277
+ * Image files without label files
278
+ * Text files without image files
279
+ * Illegal classes in label files
280
+ * Invalid boxes in label files
281
+
282
+ Args:
283
+ input_folder (str): the YOLO dataset folder to validate
284
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
285
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
286
+ input_folder as the base folder, though this is not explicitly checked.
287
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
288
+ parallelization
289
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
290
+ not used if [n_workers] <= 1
291
+ verbose (bool, optional): enable additional debug console output
292
+
293
+ Returns:
294
+ dict: validation results, as a dict with fields:
295
+
296
+ - image_files_without_label_files (list)
297
+ - label_files_without_image_files (list)
298
+ - label_results (list of dicts with field 'filename', 'errors') (list)
299
+ """
300
+
301
+ # Validate arguments
302
+ assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
303
+ if n_workers > 1:
304
+ assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
305
+
306
+ category_id_to_name = load_yolo_class_list(class_name_file)
307
+
308
+ print('Enumerating files in {}'.format(input_folder))
309
+
310
+ all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
311
+ convert_slashes=True)
312
+ label_files = [fn for fn in all_files if fn.endswith('.txt')]
313
+ image_files = find_image_strings(all_files)
314
+ print('Found {} images files and {} label files in {}'.format(
315
+ len(image_files),len(label_files),input_folder))
316
+
317
+ label_files_set = set(label_files)
318
+
319
+ image_files_without_extension = set()
320
+ for fn in image_files:
321
+ image_file_without_extension = os.path.splitext(fn)[0]
322
+ assert image_file_without_extension not in image_files_without_extension, \
323
+ 'Duplicate image file, likely with different extensions: {}'.format(fn)
324
+ image_files_without_extension.add(image_file_without_extension)
325
+
326
+ print('Looking for missing image/label files')
327
+
328
+ image_files_without_label_files = []
329
+ label_files_without_images = []
330
+
331
+ for image_file in tqdm(image_files):
332
+ expected_label_file = os.path.splitext(image_file)[0] + '.txt'
333
+ if expected_label_file not in label_files_set:
334
+ image_files_without_label_files.append(image_file)
335
+
336
+ for label_file in tqdm(label_files):
337
+ expected_image_file_without_extension = os.path.splitext(label_file)[0]
338
+ if expected_image_file_without_extension not in image_files_without_extension:
339
+ label_files_without_images.append(label_file)
340
+
341
+ print('Found {} image files without labels, {} labels without images'.format(
342
+ len(image_files_without_label_files),len(label_files_without_images)))
343
+
344
+ print('Validating label files')
345
+
346
+ if n_workers <= 1:
347
+
348
+ label_results = []
349
+ for fn_abs in tqdm(label_files):
350
+ label_results.append(validate_label_file(fn_abs,
351
+ category_id_to_name=category_id_to_name,
352
+ verbose=verbose))
353
+
354
+ else:
355
+
356
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
357
+
358
+ if pool_type == 'thread':
359
+ pool = ThreadPool(n_workers)
360
+ else:
361
+ pool = Pool(n_workers)
362
+
363
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
364
+
365
+ p = partial(validate_label_file,
366
+ category_id_to_name=category_id_to_name,
367
+ verbose=verbose)
368
+ label_results = list(tqdm(pool.imap(p, label_files),
369
+ total=len(label_files)))
370
+
371
+ assert len(label_results) == len(label_files)
372
+
373
+ validation_results = {}
374
+ validation_results['image_files_without_label_files'] = image_files_without_label_files
375
+ validation_results['label_files_without_images'] = label_files_without_images
376
+ validation_results['label_results'] = label_results
377
+
378
+ return validation_results
379
+
380
+ # ...validate_yolo_dataset(...)
381
+
382
+
383
+ #%% Main conversion function
384
+
385
+ def yolo_to_coco(input_folder,
386
+ class_name_file,
387
+ output_file=None,
388
+ empty_image_handling='no_annotations',
389
+ empty_image_category_name='empty',
390
+ error_image_handling='no_annotations',
391
+ allow_images_without_label_files=True,
392
+ n_workers=1,
393
+ pool_type='thread',
394
+ recursive=True,
395
+ exclude_string=None,
396
+ include_string=None):
397
+ """
398
+ Converts a YOLO-formatted dataset to a COCO-formatted dataset.
399
+
400
+ All images will be assigned an "error" value, usually None.
401
+
402
+ Args:
403
+ input_folder (str): the YOLO dataset folder to validate
404
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
405
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
406
+ input_folder as the base folder, though this is not explicitly checked.
407
+ output_file (str, optional): .json file to which we should write COCO .json data
408
+ empty_image_handling (str, optional): how to handle images with no boxes; whether
409
+ this includes images with no .txt files depending on the value of
410
+ [allow_images_without_label_files]. Can be:
411
+
412
+ - 'no_annotations': include the image in the image list, with no annotations
413
+ - 'empty_annotations': include the image in the image list, and add an annotation without
414
+ any bounding boxes, using a category called [empty_image_category_name].
415
+ - 'skip': don't include the image in the image list
416
+ - 'error': there shouldn't be any empty images
417
+ error_image_handling (str, optional): how to handle images that don't load properly; can
418
+ be:
419
+
420
+ - 'skip': don't include the image at all
421
+ - 'no_annotations': include with no annotations
422
+
423
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
424
+ parallelization
425
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
426
+ not used if [n_workers] <= 1
427
+ recursive (bool, optional): whether to recurse into [input_folder]
428
+ exclude_string (str, optional): exclude any images whose filename contains a string
429
+ include_string (str, optional): include only images whose filename contains a string
430
+
431
+ Returns:
432
+ dict: COCO-formatted data, the same as what's written to [output_file]
433
+ """
434
+
435
+ ## Validate input
436
+
437
+ assert os.path.isdir(input_folder)
438
+ assert os.path.isfile(class_name_file)
439
+
440
+ assert empty_image_handling in \
441
+ ('no_annotations','empty_annotations','skip','error'), \
442
+ 'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
443
+
444
+
445
+ ## Read class names
446
+
447
+ category_id_to_name = load_yolo_class_list(class_name_file)
448
+
449
+
450
+ # Find or create the empty image category, if necessary
451
+ empty_category_id = None
452
+
453
+ if (empty_image_handling == 'empty_annotations'):
454
+ category_name_to_id = invert_dictionary(category_id_to_name)
455
+ if empty_image_category_name in category_name_to_id:
456
+ empty_category_id = category_name_to_id[empty_image_category_name]
457
+ print('Using existing empty image category with name {}, ID {}'.format(
458
+ empty_image_category_name,empty_category_id))
459
+ else:
460
+ empty_category_id = len(category_id_to_name)
461
+ print('Adding an empty category with name {}, ID {}'.format(
462
+ empty_image_category_name,empty_category_id))
463
+ category_id_to_name[empty_category_id] = empty_image_category_name
464
+
465
+
466
+ ## Enumerate images
467
+
468
+ print('Enumerating images...')
469
+
470
+ image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
471
+
472
+ n_files_original = len(image_files_abs)
473
+
474
+ # Optionally include/exclude images matching specific strings
475
+ if exclude_string is not None:
476
+ image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
477
+ if include_string is not None:
478
+ image_files_abs = [fn for fn in image_files_abs if include_string in fn]
479
+
480
+ if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
481
+ n_excluded = n_files_original - len(image_files_abs)
482
+ print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
483
+
81
484
  categories = []
82
485
 
83
486
  for category_id in category_id_to_name:
@@ -87,79 +490,111 @@ def yolo_to_coco(input_folder,class_name_file,output_file=None):
87
490
  info['version'] = '1.0'
88
491
  info['description'] = 'Converted from YOLO format'
89
492
 
90
- # fn = image_files[0]
91
- for fn in tqdm(image_files):
92
-
93
- im = Image.open(fn)
94
- im_width, im_height = im.size
95
-
96
- # Create the image object for this image
97
- im = {}
98
- fn_relative = os.path.relpath(fn,input_folder)
99
- im['file_name'] = fn_relative
100
- im['id'] = fn_relative.replace(' ','_')
101
- im['location'] = 'unknown'
102
- images.append(im)
103
-
104
- # Is there an annotation file for this image?
105
- annotation_file = os.path.splitext(fn)[0] + '.txt'
106
- if not os.path.isfile(annotation_file):
107
- annotation_file = os.path.splitext(fn)[0] + '.TXT'
108
- if not os.path.isfile(annotation_file):
109
- # This is an image with no annotations, currently don't do anything special
110
- # here
111
- pass
493
+ image_ids = set()
494
+
495
+
496
+ ## If we're expected to have labels for every image, check before we process all the images
497
+
498
+ if not allow_images_without_label_files:
499
+ print('Verifying that label files exist')
500
+ for image_file_abs in tqdm(image_files_abs):
501
+ label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
502
+ assert os.path.isfile(label_file_abs), \
503
+ 'No annotation file for {}'.format(image_file_abs)
504
+
505
+
506
+ ## Initial loop to make sure image IDs will be unique
507
+
508
+ print('Validating image IDs...')
509
+
510
+ for fn_abs in tqdm(image_files_abs):
511
+
512
+ fn_relative = os.path.relpath(fn_abs,input_folder)
513
+ image_id = _filename_to_image_id(fn_relative)
514
+ assert image_id not in image_ids, \
515
+ 'Oops, you have hit a very esoteric case where you have the same filename ' + \
516
+ 'with both spaces and underscores, this is not currently handled.'
517
+ image_ids.add(image_id)
518
+
519
+
520
+ ## Main loop to process labels
521
+
522
+ print('Processing labels...')
523
+
524
+ if n_workers <= 1:
525
+
526
+ image_results = []
527
+ for fn_abs in tqdm(image_files_abs):
528
+ image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
529
+
530
+ else:
531
+
532
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
533
+
534
+ if pool_type == 'thread':
535
+ pool = ThreadPool(n_workers)
112
536
  else:
113
- with open(annotation_file,'r') as f:
114
- lines = f.readlines()
115
- lines = [s.strip() for s in lines]
116
-
117
- # s = lines[0]
118
- annotation_number = 0
119
- for s in lines:
120
- if len(s.strip()) == 0:
121
- continue
122
- tokens = s.split()
123
- assert len(tokens) == 5
124
- category_id = int(tokens[0])
125
- assert category_id in category_id_to_name, \
126
- 'Unrecognized category ID {} in annotation file {}'.format(
127
- category_id,annotation_file)
128
- ann = {}
129
- ann['id'] = im['id'] + '_' + str(annotation_number)
130
- ann['image_id'] = im['id']
131
- ann['category_id'] = category_id
132
- ann['sequence_level_annotation'] = False
133
-
134
- # COCO: [x_min, y_min, width, height] in absolute coordinates
135
- # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
136
-
137
- yolo_bbox = [float(x) for x in tokens[1:]]
138
-
139
- normalized_x_center = yolo_bbox[0]
140
- normalized_y_center = yolo_bbox[1]
141
- normalized_width = yolo_bbox[2]
142
- normalized_height = yolo_bbox[3]
143
-
144
- absolute_x_center = normalized_x_center * im_width
145
- absolute_y_center = normalized_y_center * im_height
146
- absolute_width = normalized_width * im_width
147
- absolute_height = normalized_height * im_height
148
- absolute_x_min = absolute_x_center - absolute_width / 2
149
- absolute_y_min = absolute_y_center - absolute_height / 2
150
-
151
- coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
537
+ pool = Pool(n_workers)
538
+
539
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
540
+
541
+ p = partial(_process_image,input_folder=input_folder,
542
+ category_id_to_name=category_id_to_name)
543
+ image_results = list(tqdm(pool.imap(p, image_files_abs),
544
+ total=len(image_files_abs)))
152
545
 
153
- ann['bbox'] = coco_bbox
154
- annotation_number += 1
546
+
547
+ assert len(image_results) == len(image_files_abs)
548
+
549
+
550
+ ## Re-assembly of results into a COCO dict
551
+
552
+ print('Assembling labels...')
553
+
554
+ images = []
555
+ annotations = []
556
+
557
+ for image_result in tqdm(image_results):
558
+
559
+ im = image_result[0]
560
+ annotations_this_image = image_result[1]
561
+
562
+ # If we have annotations for this image
563
+ if len(annotations_this_image) > 0:
564
+ assert im['error'] is None
565
+ images.append(im)
566
+ for ann in annotations_this_image:
567
+ annotations.append(ann)
155
568
 
156
- annotations.append(ann)
569
+ # If this image failed to read
570
+ elif im['error'] is not None:
571
+
572
+ if error_image_handling == 'skip':
573
+ pass
574
+ elif error_image_handling == 'no_annotations':
575
+ images.append(im)
157
576
 
158
- # ...for each annotation
577
+ # If this image read successfully, but there are no annotations
578
+ else:
159
579
 
160
- # ...if this image has annotations
580
+ if empty_image_handling == 'skip':
581
+ pass
582
+ elif empty_image_handling == 'no_annotations':
583
+ images.append(im)
584
+ elif empty_image_handling == 'empty_annotations':
585
+ assert empty_category_id is not None
586
+ ann = {}
587
+ ann['id'] = im['id'] + '_0'
588
+ ann['image_id'] = im['id']
589
+ ann['category_id'] = empty_category_id
590
+ ann['sequence_level_annotation'] = False
591
+ # This would also be a reasonable thing to do, but it's not the convention
592
+ # we're adopting.
593
+ # ann['bbox'] = [0,0,0,0]
594
+ annotations.append(ann)
595
+ images.append(im)
161
596
 
162
- # ...for each image
597
+ # ...for each image result
163
598
 
164
599
  print('Read {} annotations for {} images'.format(len(annotations),
165
600
  len(images)))
@@ -234,3 +669,8 @@ if False:
234
669
 
235
670
  from md_utils.path_utils import open_file
236
671
  open_file(html_output_file)
672
+
673
+
674
+ #%% Command-line driver
675
+
676
+ # TODO
detection/__init__.py ADDED
File without changes
File without changes