megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,676 @@
1
+ """
2
+
3
+ yolo_to_coco.py
4
+
5
+ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import json
12
+ import os
13
+
14
+ from multiprocessing.pool import ThreadPool
15
+ from multiprocessing.pool import Pool
16
+ from functools import partial
17
+
18
+ from tqdm import tqdm
19
+
20
+ from megadetector.utils.path_utils import find_images
21
+ from megadetector.utils.path_utils import recursive_file_list
22
+ from megadetector.utils.path_utils import find_image_strings
23
+ from megadetector.utils.ct_utils import invert_dictionary
24
+ from megadetector.visualization.visualization_utils import open_image
25
+ from megadetector.data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
26
+
27
+
28
+ #%% Support functions
29
+
30
+ def _filename_to_image_id(fn):
31
+ """
32
+ Image IDs can't have spaces in them, replae spaces with underscores
33
+ """
34
+ return fn.replace(' ','_')
35
+
36
+
37
+ def _process_image(fn_abs,input_folder,category_id_to_name):
38
+ """
39
+ Internal support function for processing one image's labels.
40
+ """
41
+
42
+ # Create the image object for this image
43
+ fn_relative = os.path.relpath(fn_abs,input_folder)
44
+ image_id = _filename_to_image_id(fn_relative)
45
+
46
+ # This is done in a separate loop now
47
+ #
48
+ # assert image_id not in image_ids, \
49
+ # 'Oops, you have hit a very esoteric case where you have the same filename ' + \
50
+ # 'with both spaces and underscores, this is not currently handled.'
51
+ # image_ids.add(image_id)
52
+
53
+ im = {}
54
+ im['file_name'] = fn_relative
55
+ im['id'] = image_id
56
+
57
+ annotations_this_image = []
58
+
59
+ try:
60
+ pil_im = open_image(fn_abs)
61
+ im_width, im_height = pil_im.size
62
+ im['width'] = im_width
63
+ im['height'] = im_height
64
+ im['error'] = None
65
+ except Exception as e:
66
+ print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
67
+ im['width'] = -1
68
+ im['height'] = -1
69
+ im['error'] = str(e)
70
+ return (im,annotations_this_image)
71
+
72
+ # Is there an annotation file for this image?
73
+ annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
74
+ if not os.path.isfile(annotation_file):
75
+ annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
76
+
77
+ if os.path.isfile(annotation_file):
78
+
79
+ with open(annotation_file,'r') as f:
80
+ lines = f.readlines()
81
+ lines = [s.strip() for s in lines]
82
+
83
+ # s = lines[0]
84
+ annotation_number = 0
85
+
86
+ for s in lines:
87
+
88
+ if len(s.strip()) == 0:
89
+ continue
90
+
91
+ tokens = s.split()
92
+ assert len(tokens) == 5
93
+ category_id = int(tokens[0])
94
+ assert category_id in category_id_to_name, \
95
+ 'Unrecognized category ID {} in annotation file {}'.format(
96
+ category_id,annotation_file)
97
+ ann = {}
98
+ ann['id'] = im['id'] + '_' + str(annotation_number)
99
+ ann['image_id'] = im['id']
100
+ ann['category_id'] = category_id
101
+ ann['sequence_level_annotation'] = False
102
+
103
+ # COCO: [x_min, y_min, width, height] in absolute coordinates
104
+ # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
105
+
106
+ yolo_bbox = [float(x) for x in tokens[1:]]
107
+
108
+ normalized_x_center = yolo_bbox[0]
109
+ normalized_y_center = yolo_bbox[1]
110
+ normalized_width = yolo_bbox[2]
111
+ normalized_height = yolo_bbox[3]
112
+
113
+ absolute_x_center = normalized_x_center * im_width
114
+ absolute_y_center = normalized_y_center * im_height
115
+ absolute_width = normalized_width * im_width
116
+ absolute_height = normalized_height * im_height
117
+ absolute_x_min = absolute_x_center - absolute_width / 2
118
+ absolute_y_min = absolute_y_center - absolute_height / 2
119
+
120
+ coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
121
+
122
+ ann['bbox'] = coco_bbox
123
+ annotation_number += 1
124
+
125
+ annotations_this_image.append(ann)
126
+
127
+ # ...for each annotation
128
+
129
+ # ...if this image has annotations
130
+
131
+ return (im,annotations_this_image)
132
+
133
+ # ...def _process_image(...)
134
+
135
+
136
+ def load_yolo_class_list(class_name_file):
137
+ """
138
+ Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
139
+ [class_name_file].
140
+
141
+ Args:
142
+ class_name_file (str or list): this can be:
143
+ - a .yaml or .yaml file in YOLO's dataset.yaml format
144
+ - a .txt or .data file containing a flat list of class names
145
+ - a list of class names
146
+
147
+ Returns:
148
+ dict: A dict mapping zero-indexed integer IDs to class names
149
+ """
150
+
151
+ # class_name_file can also be a list of class names
152
+ if isinstance(class_name_file,list):
153
+ category_id_to_name = {}
154
+ for i_name,name in enumerate(class_name_file):
155
+ category_id_to_name[i_name] = name
156
+ return category_id_to_name
157
+
158
+ ext = os.path.splitext(class_name_file)[1][1:]
159
+ assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
160
+ class_name_file)
161
+
162
+ if ext in ('txt','data'):
163
+
164
+ with open(class_name_file,'r') as f:
165
+ lines = f.readlines()
166
+ assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
167
+ class_names = [s.strip() for s in lines]
168
+ assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
169
+
170
+ # Blank lines should only appear at the end
171
+ b_found_blank = False
172
+ for s in lines:
173
+ if len(s) == 0:
174
+ b_found_blank = True
175
+ elif b_found_blank:
176
+ raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
177
+ class_name_file))
178
+
179
+ category_id_to_name = {}
180
+ for i_category_id,category_name in enumerate(class_names):
181
+ assert len(category_name) > 0
182
+ category_id_to_name[i_category_id] = category_name
183
+
184
+ else:
185
+
186
+ assert ext in ('yml','yaml')
187
+ category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
188
+
189
+ return category_id_to_name
190
+
191
+ # ...load_yolo_class_list(...)
192
+
193
+
194
+ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
195
+ """"
196
+ Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
197
+
198
+ Args:
199
+ label_file (str): the .txt file to validate
200
+ category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
201
+ if this is not None, this function errors if the file uses a category that's not
202
+ in this dict
203
+ verbose (bool, optional): enable additional debug console output
204
+
205
+ Returns:
206
+ dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
207
+ errors (if any) that we found in this file)
208
+ """
209
+
210
+ label_result = {}
211
+ label_result['file'] = label_file
212
+ label_result['errors'] = []
213
+
214
+ try:
215
+ with open(label_file,'r') as f:
216
+ lines = f.readlines()
217
+ except Exception as e:
218
+ label_result['errors'].append('Read error: {}'.format(str(e)))
219
+ return label_result
220
+
221
+ # i_line 0; line = lines[i_line]
222
+ for i_line,line in enumerate(lines):
223
+ s = line.strip()
224
+ if len(s) == 0 or s[0] == '#':
225
+ continue
226
+
227
+ try:
228
+
229
+ tokens = s.split()
230
+ assert len(tokens) == 5, '{} tokens'.format(len(tokens))
231
+
232
+ if category_id_to_name is not None:
233
+ category_id = int(tokens[0])
234
+ assert category_id in category_id_to_name, \
235
+ 'Unrecognized category ID {}'.format(category_id)
236
+
237
+ yolo_bbox = [float(x) for x in tokens[1:]]
238
+
239
+ except Exception as e:
240
+ label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
241
+ continue
242
+
243
+ normalized_x_center = yolo_bbox[0]
244
+ normalized_y_center = yolo_bbox[1]
245
+ normalized_width = yolo_bbox[2]
246
+ normalized_height = yolo_bbox[3]
247
+
248
+ normalized_x_min = normalized_x_center - normalized_width / 2.0
249
+ normalized_x_max = normalized_x_center + normalized_width / 2.0
250
+ normalized_y_min = normalized_y_center - normalized_height / 2.0
251
+ normalized_y_max = normalized_y_center + normalized_height / 2.0
252
+
253
+ if normalized_x_min < 0 or normalized_y_min < 0 or \
254
+ normalized_x_max > 1 or normalized_y_max > 1:
255
+ label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
256
+ normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
257
+
258
+ # ...for each line
259
+
260
+ if verbose:
261
+ if len(label_result['errors']) > 0:
262
+ print('Errors for {}:'.format(label_file))
263
+ for error in label_result['errors']:
264
+ print(error)
265
+
266
+ return label_result
267
+
268
+ # ...def validate_label_file(...)
269
+
270
+
271
+ def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
272
+ """
273
+ Verifies all the labels in a YOLO dataset folder.
274
+
275
+ Looks for:
276
+
277
+ * Image files without label files
278
+ * Text files without image files
279
+ * Illegal classes in label files
280
+ * Invalid boxes in label files
281
+
282
+ Args:
283
+ input_folder (str): the YOLO dataset folder to validate
284
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
285
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
286
+ input_folder as the base folder, though this is not explicitly checked.
287
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
288
+ parallelization
289
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
290
+ not used if [n_workers] <= 1
291
+ verbose (bool, optional): enable additional debug console output
292
+
293
+ Returns:
294
+ dict: validation results, as a dict with fields:
295
+
296
+ - image_files_without_label_files (list)
297
+ - label_files_without_image_files (list)
298
+ - label_results (list of dicts with field 'filename', 'errors') (list)
299
+ """
300
+
301
+ # Validate arguments
302
+ assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
303
+ if n_workers > 1:
304
+ assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
305
+
306
+ category_id_to_name = load_yolo_class_list(class_name_file)
307
+
308
+ print('Enumerating files in {}'.format(input_folder))
309
+
310
+ all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
311
+ convert_slashes=True)
312
+ label_files = [fn for fn in all_files if fn.endswith('.txt')]
313
+ image_files = find_image_strings(all_files)
314
+ print('Found {} images files and {} label files in {}'.format(
315
+ len(image_files),len(label_files),input_folder))
316
+
317
+ label_files_set = set(label_files)
318
+
319
+ image_files_without_extension = set()
320
+ for fn in image_files:
321
+ image_file_without_extension = os.path.splitext(fn)[0]
322
+ assert image_file_without_extension not in image_files_without_extension, \
323
+ 'Duplicate image file, likely with different extensions: {}'.format(fn)
324
+ image_files_without_extension.add(image_file_without_extension)
325
+
326
+ print('Looking for missing image/label files')
327
+
328
+ image_files_without_label_files = []
329
+ label_files_without_images = []
330
+
331
+ for image_file in tqdm(image_files):
332
+ expected_label_file = os.path.splitext(image_file)[0] + '.txt'
333
+ if expected_label_file not in label_files_set:
334
+ image_files_without_label_files.append(image_file)
335
+
336
+ for label_file in tqdm(label_files):
337
+ expected_image_file_without_extension = os.path.splitext(label_file)[0]
338
+ if expected_image_file_without_extension not in image_files_without_extension:
339
+ label_files_without_images.append(label_file)
340
+
341
+ print('Found {} image files without labels, {} labels without images'.format(
342
+ len(image_files_without_label_files),len(label_files_without_images)))
343
+
344
+ print('Validating label files')
345
+
346
+ if n_workers <= 1:
347
+
348
+ label_results = []
349
+ for fn_abs in tqdm(label_files):
350
+ label_results.append(validate_label_file(fn_abs,
351
+ category_id_to_name=category_id_to_name,
352
+ verbose=verbose))
353
+
354
+ else:
355
+
356
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
357
+
358
+ if pool_type == 'thread':
359
+ pool = ThreadPool(n_workers)
360
+ else:
361
+ pool = Pool(n_workers)
362
+
363
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
364
+
365
+ p = partial(validate_label_file,
366
+ category_id_to_name=category_id_to_name,
367
+ verbose=verbose)
368
+ label_results = list(tqdm(pool.imap(p, label_files),
369
+ total=len(label_files)))
370
+
371
+ assert len(label_results) == len(label_files)
372
+
373
+ validation_results = {}
374
+ validation_results['image_files_without_label_files'] = image_files_without_label_files
375
+ validation_results['label_files_without_images'] = label_files_without_images
376
+ validation_results['label_results'] = label_results
377
+
378
+ return validation_results
379
+
380
+ # ...validate_yolo_dataset(...)
381
+
382
+
383
+ #%% Main conversion function
384
+
385
+ def yolo_to_coco(input_folder,
386
+ class_name_file,
387
+ output_file=None,
388
+ empty_image_handling='no_annotations',
389
+ empty_image_category_name='empty',
390
+ error_image_handling='no_annotations',
391
+ allow_images_without_label_files=True,
392
+ n_workers=1,
393
+ pool_type='thread',
394
+ recursive=True,
395
+ exclude_string=None,
396
+ include_string=None):
397
+ """
398
+ Converts a YOLO-formatted dataset to a COCO-formatted dataset.
399
+
400
+ All images will be assigned an "error" value, usually None.
401
+
402
+ Args:
403
+ input_folder (str): the YOLO dataset folder to validate
404
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
405
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
406
+ input_folder as the base folder, though this is not explicitly checked.
407
+ output_file (str, optional): .json file to which we should write COCO .json data
408
+ empty_image_handling (str, optional): how to handle images with no boxes; whether
409
+ this includes images with no .txt files depending on the value of
410
+ [allow_images_without_label_files]. Can be:
411
+
412
+ - 'no_annotations': include the image in the image list, with no annotations
413
+ - 'empty_annotations': include the image in the image list, and add an annotation without
414
+ any bounding boxes, using a category called [empty_image_category_name].
415
+ - 'skip': don't include the image in the image list
416
+ - 'error': there shouldn't be any empty images
417
+ error_image_handling (str, optional): how to handle images that don't load properly; can
418
+ be:
419
+
420
+ - 'skip': don't include the image at all
421
+ - 'no_annotations': include with no annotations
422
+
423
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
424
+ parallelization
425
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
426
+ not used if [n_workers] <= 1
427
+ recursive (bool, optional): whether to recurse into [input_folder]
428
+ exclude_string (str, optional): exclude any images whose filename contains a string
429
+ include_string (str, optional): include only images whose filename contains a string
430
+
431
+ Returns:
432
+ dict: COCO-formatted data, the same as what's written to [output_file]
433
+ """
434
+
435
+ ## Validate input
436
+
437
+ assert os.path.isdir(input_folder)
438
+ assert os.path.isfile(class_name_file)
439
+
440
+ assert empty_image_handling in \
441
+ ('no_annotations','empty_annotations','skip','error'), \
442
+ 'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
443
+
444
+
445
+ ## Read class names
446
+
447
+ category_id_to_name = load_yolo_class_list(class_name_file)
448
+
449
+
450
+ # Find or create the empty image category, if necessary
451
+ empty_category_id = None
452
+
453
+ if (empty_image_handling == 'empty_annotations'):
454
+ category_name_to_id = invert_dictionary(category_id_to_name)
455
+ if empty_image_category_name in category_name_to_id:
456
+ empty_category_id = category_name_to_id[empty_image_category_name]
457
+ print('Using existing empty image category with name {}, ID {}'.format(
458
+ empty_image_category_name,empty_category_id))
459
+ else:
460
+ empty_category_id = len(category_id_to_name)
461
+ print('Adding an empty category with name {}, ID {}'.format(
462
+ empty_image_category_name,empty_category_id))
463
+ category_id_to_name[empty_category_id] = empty_image_category_name
464
+
465
+
466
+ ## Enumerate images
467
+
468
+ print('Enumerating images...')
469
+
470
+ image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
471
+
472
+ n_files_original = len(image_files_abs)
473
+
474
+ # Optionally include/exclude images matching specific strings
475
+ if exclude_string is not None:
476
+ image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
477
+ if include_string is not None:
478
+ image_files_abs = [fn for fn in image_files_abs if include_string in fn]
479
+
480
+ if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
481
+ n_excluded = n_files_original - len(image_files_abs)
482
+ print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
483
+
484
+ categories = []
485
+
486
+ for category_id in category_id_to_name:
487
+ categories.append({'id':category_id,'name':category_id_to_name[category_id]})
488
+
489
+ info = {}
490
+ info['version'] = '1.0'
491
+ info['description'] = 'Converted from YOLO format'
492
+
493
+ image_ids = set()
494
+
495
+
496
+ ## If we're expected to have labels for every image, check before we process all the images
497
+
498
+ if not allow_images_without_label_files:
499
+ print('Verifying that label files exist')
500
+ for image_file_abs in tqdm(image_files_abs):
501
+ label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
502
+ assert os.path.isfile(label_file_abs), \
503
+ 'No annotation file for {}'.format(image_file_abs)
504
+
505
+
506
+ ## Initial loop to make sure image IDs will be unique
507
+
508
+ print('Validating image IDs...')
509
+
510
+ for fn_abs in tqdm(image_files_abs):
511
+
512
+ fn_relative = os.path.relpath(fn_abs,input_folder)
513
+ image_id = _filename_to_image_id(fn_relative)
514
+ assert image_id not in image_ids, \
515
+ 'Oops, you have hit a very esoteric case where you have the same filename ' + \
516
+ 'with both spaces and underscores, this is not currently handled.'
517
+ image_ids.add(image_id)
518
+
519
+
520
+ ## Main loop to process labels
521
+
522
+ print('Processing labels...')
523
+
524
+ if n_workers <= 1:
525
+
526
+ image_results = []
527
+ for fn_abs in tqdm(image_files_abs):
528
+ image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
529
+
530
+ else:
531
+
532
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
533
+
534
+ if pool_type == 'thread':
535
+ pool = ThreadPool(n_workers)
536
+ else:
537
+ pool = Pool(n_workers)
538
+
539
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
540
+
541
+ p = partial(_process_image,input_folder=input_folder,
542
+ category_id_to_name=category_id_to_name)
543
+ image_results = list(tqdm(pool.imap(p, image_files_abs),
544
+ total=len(image_files_abs)))
545
+
546
+
547
+ assert len(image_results) == len(image_files_abs)
548
+
549
+
550
+ ## Re-assembly of results into a COCO dict
551
+
552
+ print('Assembling labels...')
553
+
554
+ images = []
555
+ annotations = []
556
+
557
+ for image_result in tqdm(image_results):
558
+
559
+ im = image_result[0]
560
+ annotations_this_image = image_result[1]
561
+
562
+ # If we have annotations for this image
563
+ if len(annotations_this_image) > 0:
564
+ assert im['error'] is None
565
+ images.append(im)
566
+ for ann in annotations_this_image:
567
+ annotations.append(ann)
568
+
569
+ # If this image failed to read
570
+ elif im['error'] is not None:
571
+
572
+ if error_image_handling == 'skip':
573
+ pass
574
+ elif error_image_handling == 'no_annotations':
575
+ images.append(im)
576
+
577
+ # If this image read successfully, but there are no annotations
578
+ else:
579
+
580
+ if empty_image_handling == 'skip':
581
+ pass
582
+ elif empty_image_handling == 'no_annotations':
583
+ images.append(im)
584
+ elif empty_image_handling == 'empty_annotations':
585
+ assert empty_category_id is not None
586
+ ann = {}
587
+ ann['id'] = im['id'] + '_0'
588
+ ann['image_id'] = im['id']
589
+ ann['category_id'] = empty_category_id
590
+ ann['sequence_level_annotation'] = False
591
+ # This would also be a reasonable thing to do, but it's not the convention
592
+ # we're adopting.
593
+ # ann['bbox'] = [0,0,0,0]
594
+ annotations.append(ann)
595
+ images.append(im)
596
+
597
+ # ...for each image result
598
+
599
+ print('Read {} annotations for {} images'.format(len(annotations),
600
+ len(images)))
601
+
602
+ d = {}
603
+ d['images'] = images
604
+ d['annotations'] = annotations
605
+ d['categories'] = categories
606
+ d['info'] = info
607
+
608
+ if output_file is not None:
609
+ print('Writing to {}'.format(output_file))
610
+ with open(output_file,'w') as f:
611
+ json.dump(d,f,indent=1)
612
+
613
+ return d
614
+
615
+ # ...def yolo_to_coco()
616
+
617
+
618
+ #%% Interactive driver
619
+
620
+ if False:
621
+
622
+ pass
623
+
624
+ #%% Convert YOLO folders to COCO
625
+
626
+ preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
627
+ input_folder = '/home/user/data/noaa-fish/val'
628
+ output_file = '/home/user/data/noaa-fish/val.json'
629
+ class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
630
+
631
+ d = yolo_to_coco(input_folder,class_name_file,output_file)
632
+
633
+ input_folder = '/home/user/data/noaa-fish/train'
634
+ output_file = '/home/user/data/noaa-fish/train.json'
635
+ class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
636
+
637
+ d = yolo_to_coco(input_folder,class_name_file,output_file)
638
+
639
+
640
+ #%% Check DB integrity
641
+
642
+ from megadetector.data_management.databases import integrity_check_json_db
643
+
644
+ options = integrity_check_json_db.IntegrityCheckOptions()
645
+ options.baseDir = input_folder
646
+ options.bCheckImageSizes = False
647
+ options.bCheckImageExistence = True
648
+ options.bFindUnusedImages = True
649
+
650
+ _, _, _ = integrity_check_json_db.integrity_check_json_db(output_file, options)
651
+
652
+
653
+ #%% Preview some images
654
+
655
+ from megadetector.visualization import visualize_db
656
+
657
+ viz_options = visualize_db.DbVizOptions()
658
+ viz_options.num_to_visualize = None
659
+ viz_options.trim_to_images_with_bboxes = False
660
+ viz_options.add_search_links = False
661
+ viz_options.sort_by_filename = False
662
+ viz_options.parallelize_rendering = True
663
+ viz_options.include_filename_links = True
664
+
665
+ html_output_file, _ = visualize_db.visualize_db(db_path=output_file,
666
+ output_dir=preview_folder,
667
+ image_base_dir=input_folder,
668
+ options=viz_options)
669
+
670
+ from megadetector.utils.path_utils import open_file
671
+ open_file(html_output_file)
672
+
673
+
674
+ #%% Command-line driver
675
+
676
+ # TODO
File without changes
File without changes