megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,676 +0,0 @@
1
- """
2
-
3
- yolo_to_coco.py
4
-
5
- Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import json
12
- import os
13
-
14
- from multiprocessing.pool import ThreadPool
15
- from multiprocessing.pool import Pool
16
- from functools import partial
17
-
18
- from tqdm import tqdm
19
-
20
- from md_utils.path_utils import find_images
21
- from md_utils.path_utils import recursive_file_list
22
- from md_utils.path_utils import find_image_strings
23
- from md_utils.ct_utils import invert_dictionary
24
- from md_visualization.visualization_utils import open_image
25
- from data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
26
-
27
-
28
- #%% Support functions
29
-
30
- def _filename_to_image_id(fn):
31
- """
32
- Image IDs can't have spaces in them, replae spaces with underscores
33
- """
34
- return fn.replace(' ','_')
35
-
36
-
37
- def _process_image(fn_abs,input_folder,category_id_to_name):
38
- """
39
- Internal support function for processing one image's labels.
40
- """
41
-
42
- # Create the image object for this image
43
- fn_relative = os.path.relpath(fn_abs,input_folder)
44
- image_id = _filename_to_image_id(fn_relative)
45
-
46
- # This is done in a separate loop now
47
- #
48
- # assert image_id not in image_ids, \
49
- # 'Oops, you have hit a very esoteric case where you have the same filename ' + \
50
- # 'with both spaces and underscores, this is not currently handled.'
51
- # image_ids.add(image_id)
52
-
53
- im = {}
54
- im['file_name'] = fn_relative
55
- im['id'] = image_id
56
-
57
- annotations_this_image = []
58
-
59
- try:
60
- pil_im = open_image(fn_abs)
61
- im_width, im_height = pil_im.size
62
- im['width'] = im_width
63
- im['height'] = im_height
64
- im['error'] = None
65
- except Exception as e:
66
- print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
67
- im['width'] = -1
68
- im['height'] = -1
69
- im['error'] = str(e)
70
- return (im,annotations_this_image)
71
-
72
- # Is there an annotation file for this image?
73
- annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
74
- if not os.path.isfile(annotation_file):
75
- annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
76
-
77
- if os.path.isfile(annotation_file):
78
-
79
- with open(annotation_file,'r') as f:
80
- lines = f.readlines()
81
- lines = [s.strip() for s in lines]
82
-
83
- # s = lines[0]
84
- annotation_number = 0
85
-
86
- for s in lines:
87
-
88
- if len(s.strip()) == 0:
89
- continue
90
-
91
- tokens = s.split()
92
- assert len(tokens) == 5
93
- category_id = int(tokens[0])
94
- assert category_id in category_id_to_name, \
95
- 'Unrecognized category ID {} in annotation file {}'.format(
96
- category_id,annotation_file)
97
- ann = {}
98
- ann['id'] = im['id'] + '_' + str(annotation_number)
99
- ann['image_id'] = im['id']
100
- ann['category_id'] = category_id
101
- ann['sequence_level_annotation'] = False
102
-
103
- # COCO: [x_min, y_min, width, height] in absolute coordinates
104
- # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
105
-
106
- yolo_bbox = [float(x) for x in tokens[1:]]
107
-
108
- normalized_x_center = yolo_bbox[0]
109
- normalized_y_center = yolo_bbox[1]
110
- normalized_width = yolo_bbox[2]
111
- normalized_height = yolo_bbox[3]
112
-
113
- absolute_x_center = normalized_x_center * im_width
114
- absolute_y_center = normalized_y_center * im_height
115
- absolute_width = normalized_width * im_width
116
- absolute_height = normalized_height * im_height
117
- absolute_x_min = absolute_x_center - absolute_width / 2
118
- absolute_y_min = absolute_y_center - absolute_height / 2
119
-
120
- coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
121
-
122
- ann['bbox'] = coco_bbox
123
- annotation_number += 1
124
-
125
- annotations_this_image.append(ann)
126
-
127
- # ...for each annotation
128
-
129
- # ...if this image has annotations
130
-
131
- return (im,annotations_this_image)
132
-
133
- # ...def _process_image(...)
134
-
135
-
136
- def load_yolo_class_list(class_name_file):
137
- """
138
- Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
139
- [class_name_file].
140
-
141
- Args:
142
- class_name_file (str or list): this can be:
143
- - a .yaml or .yaml file in YOLO's dataset.yaml format
144
- - a .txt or .data file containing a flat list of class names
145
- - a list of class names
146
-
147
- Returns:
148
- dict: A dict mapping zero-indexed integer IDs to class names
149
- """
150
-
151
- # class_name_file can also be a list of class names
152
- if isinstance(class_name_file,list):
153
- category_id_to_name = {}
154
- for i_name,name in enumerate(class_name_file):
155
- category_id_to_name[i_name] = name
156
- return category_id_to_name
157
-
158
- ext = os.path.splitext(class_name_file)[1][1:]
159
- assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
160
- class_name_file)
161
-
162
- if ext in ('txt','data'):
163
-
164
- with open(class_name_file,'r') as f:
165
- lines = f.readlines()
166
- assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
167
- class_names = [s.strip() for s in lines]
168
- assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
169
-
170
- # Blank lines should only appear at the end
171
- b_found_blank = False
172
- for s in lines:
173
- if len(s) == 0:
174
- b_found_blank = True
175
- elif b_found_blank:
176
- raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
177
- class_name_file))
178
-
179
- category_id_to_name = {}
180
- for i_category_id,category_name in enumerate(class_names):
181
- assert len(category_name) > 0
182
- category_id_to_name[i_category_id] = category_name
183
-
184
- else:
185
-
186
- assert ext in ('yml','yaml')
187
- category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
188
-
189
- return category_id_to_name
190
-
191
- # ...load_yolo_class_list(...)
192
-
193
-
194
- def validate_label_file(label_file,category_id_to_name=None,verbose=False):
195
- """"
196
- Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
197
-
198
- Args:
199
- label_file (str): the .txt file to validate
200
- category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
201
- if this is not None, this function errors if the file uses a category that's not
202
- in this dict
203
- verbose (bool, optional): enable additional debug console output
204
-
205
- Returns:
206
- dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
207
- errors (if any) that we found in this file)
208
- """
209
-
210
- label_result = {}
211
- label_result['file'] = label_file
212
- label_result['errors'] = []
213
-
214
- try:
215
- with open(label_file,'r') as f:
216
- lines = f.readlines()
217
- except Exception as e:
218
- label_result['errors'].append('Read error: {}'.format(str(e)))
219
- return label_result
220
-
221
- # i_line 0; line = lines[i_line]
222
- for i_line,line in enumerate(lines):
223
- s = line.strip()
224
- if len(s) == 0 or s[0] == '#':
225
- continue
226
-
227
- try:
228
-
229
- tokens = s.split()
230
- assert len(tokens) == 5, '{} tokens'.format(len(tokens))
231
-
232
- if category_id_to_name is not None:
233
- category_id = int(tokens[0])
234
- assert category_id in category_id_to_name, \
235
- 'Unrecognized category ID {}'.format(category_id)
236
-
237
- yolo_bbox = [float(x) for x in tokens[1:]]
238
-
239
- except Exception as e:
240
- label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
241
- continue
242
-
243
- normalized_x_center = yolo_bbox[0]
244
- normalized_y_center = yolo_bbox[1]
245
- normalized_width = yolo_bbox[2]
246
- normalized_height = yolo_bbox[3]
247
-
248
- normalized_x_min = normalized_x_center - normalized_width / 2.0
249
- normalized_x_max = normalized_x_center + normalized_width / 2.0
250
- normalized_y_min = normalized_y_center - normalized_height / 2.0
251
- normalized_y_max = normalized_y_center + normalized_height / 2.0
252
-
253
- if normalized_x_min < 0 or normalized_y_min < 0 or \
254
- normalized_x_max > 1 or normalized_y_max > 1:
255
- label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
256
- normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
257
-
258
- # ...for each line
259
-
260
- if verbose:
261
- if len(label_result['errors']) > 0:
262
- print('Errors for {}:'.format(label_file))
263
- for error in label_result['errors']:
264
- print(error)
265
-
266
- return label_result
267
-
268
- # ...def validate_label_file(...)
269
-
270
-
271
- def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
272
- """
273
- Verifies all the labels in a YOLO dataset folder.
274
-
275
- Looks for:
276
-
277
- * Image files without label files
278
- * Text files without image files
279
- * Illegal classes in label files
280
- * Invalid boxes in label files
281
-
282
- Args:
283
- input_folder (str): the YOLO dataset folder to validate
284
- class_name_file (str or list): a list of classes, a flat text file, or a yolo
285
- dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
286
- input_folder as the base folder, though this is not explicitly checked.
287
- n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
288
- parallelization
289
- pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
290
- not used if [n_workers] <= 1
291
- verbose (bool, optional): enable additional debug console output
292
-
293
- Returns:
294
- dict: validation results, as a dict with fields:
295
-
296
- - image_files_without_label_files (list)
297
- - label_files_without_image_files (list)
298
- - label_results (list of dicts with field 'filename', 'errors') (list)
299
- """
300
-
301
- # Validate arguments
302
- assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
303
- if n_workers > 1:
304
- assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
305
-
306
- category_id_to_name = load_yolo_class_list(class_name_file)
307
-
308
- print('Enumerating files in {}'.format(input_folder))
309
-
310
- all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
311
- convert_slashes=True)
312
- label_files = [fn for fn in all_files if fn.endswith('.txt')]
313
- image_files = find_image_strings(all_files)
314
- print('Found {} images files and {} label files in {}'.format(
315
- len(image_files),len(label_files),input_folder))
316
-
317
- label_files_set = set(label_files)
318
-
319
- image_files_without_extension = set()
320
- for fn in image_files:
321
- image_file_without_extension = os.path.splitext(fn)[0]
322
- assert image_file_without_extension not in image_files_without_extension, \
323
- 'Duplicate image file, likely with different extensions: {}'.format(fn)
324
- image_files_without_extension.add(image_file_without_extension)
325
-
326
- print('Looking for missing image/label files')
327
-
328
- image_files_without_label_files = []
329
- label_files_without_images = []
330
-
331
- for image_file in tqdm(image_files):
332
- expected_label_file = os.path.splitext(image_file)[0] + '.txt'
333
- if expected_label_file not in label_files_set:
334
- image_files_without_label_files.append(image_file)
335
-
336
- for label_file in tqdm(label_files):
337
- expected_image_file_without_extension = os.path.splitext(label_file)[0]
338
- if expected_image_file_without_extension not in image_files_without_extension:
339
- label_files_without_images.append(label_file)
340
-
341
- print('Found {} image files without labels, {} labels without images'.format(
342
- len(image_files_without_label_files),len(label_files_without_images)))
343
-
344
- print('Validating label files')
345
-
346
- if n_workers <= 1:
347
-
348
- label_results = []
349
- for fn_abs in tqdm(label_files):
350
- label_results.append(validate_label_file(fn_abs,
351
- category_id_to_name=category_id_to_name,
352
- verbose=verbose))
353
-
354
- else:
355
-
356
- assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
357
-
358
- if pool_type == 'thread':
359
- pool = ThreadPool(n_workers)
360
- else:
361
- pool = Pool(n_workers)
362
-
363
- print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
364
-
365
- p = partial(validate_label_file,
366
- category_id_to_name=category_id_to_name,
367
- verbose=verbose)
368
- label_results = list(tqdm(pool.imap(p, label_files),
369
- total=len(label_files)))
370
-
371
- assert len(label_results) == len(label_files)
372
-
373
- validation_results = {}
374
- validation_results['image_files_without_label_files'] = image_files_without_label_files
375
- validation_results['label_files_without_images'] = label_files_without_images
376
- validation_results['label_results'] = label_results
377
-
378
- return validation_results
379
-
380
- # ...validate_yolo_dataset(...)
381
-
382
-
383
- #%% Main conversion function
384
-
385
- def yolo_to_coco(input_folder,
386
- class_name_file,
387
- output_file=None,
388
- empty_image_handling='no_annotations',
389
- empty_image_category_name='empty',
390
- error_image_handling='no_annotations',
391
- allow_images_without_label_files=True,
392
- n_workers=1,
393
- pool_type='thread',
394
- recursive=True,
395
- exclude_string=None,
396
- include_string=None):
397
- """
398
- Converts a YOLO-formatted dataset to a COCO-formatted dataset.
399
-
400
- All images will be assigned an "error" value, usually None.
401
-
402
- Args:
403
- input_folder (str): the YOLO dataset folder to validate
404
- class_name_file (str or list): a list of classes, a flat text file, or a yolo
405
- dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
406
- input_folder as the base folder, though this is not explicitly checked.
407
- output_file (str, optional): .json file to which we should write COCO .json data
408
- empty_image_handling (str, optional): how to handle images with no boxes; whether
409
- this includes images with no .txt files depending on the value of
410
- [allow_images_without_label_files]. Can be:
411
-
412
- - 'no_annotations': include the image in the image list, with no annotations
413
- - 'empty_annotations': include the image in the image list, and add an annotation without
414
- any bounding boxes, using a category called [empty_image_category_name].
415
- - 'skip': don't include the image in the image list
416
- - 'error': there shouldn't be any empty images
417
- error_image_handling (str, optional): how to handle images that don't load properly; can
418
- be:
419
-
420
- - 'skip': don't include the image at all
421
- - 'no_annotations': include with no annotations
422
-
423
- n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
424
- parallelization
425
- pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
426
- not used if [n_workers] <= 1
427
- recursive (bool, optional): whether to recurse into [input_folder]
428
- exclude_string (str, optional): exclude any images whose filename contains a string
429
- include_string (str, optional): include only images whose filename contains a string
430
-
431
- Returns:
432
- dict: COCO-formatted data, the same as what's written to [output_file]
433
- """
434
-
435
- ## Validate input
436
-
437
- assert os.path.isdir(input_folder)
438
- assert os.path.isfile(class_name_file)
439
-
440
- assert empty_image_handling in \
441
- ('no_annotations','empty_annotations','skip','error'), \
442
- 'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
443
-
444
-
445
- ## Read class names
446
-
447
- category_id_to_name = load_yolo_class_list(class_name_file)
448
-
449
-
450
- # Find or create the empty image category, if necessary
451
- empty_category_id = None
452
-
453
- if (empty_image_handling == 'empty_annotations'):
454
- category_name_to_id = invert_dictionary(category_id_to_name)
455
- if empty_image_category_name in category_name_to_id:
456
- empty_category_id = category_name_to_id[empty_image_category_name]
457
- print('Using existing empty image category with name {}, ID {}'.format(
458
- empty_image_category_name,empty_category_id))
459
- else:
460
- empty_category_id = len(category_id_to_name)
461
- print('Adding an empty category with name {}, ID {}'.format(
462
- empty_image_category_name,empty_category_id))
463
- category_id_to_name[empty_category_id] = empty_image_category_name
464
-
465
-
466
- ## Enumerate images
467
-
468
- print('Enumerating images...')
469
-
470
- image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
471
-
472
- n_files_original = len(image_files_abs)
473
-
474
- # Optionally include/exclude images matching specific strings
475
- if exclude_string is not None:
476
- image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
477
- if include_string is not None:
478
- image_files_abs = [fn for fn in image_files_abs if include_string in fn]
479
-
480
- if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
481
- n_excluded = n_files_original - len(image_files_abs)
482
- print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
483
-
484
- categories = []
485
-
486
- for category_id in category_id_to_name:
487
- categories.append({'id':category_id,'name':category_id_to_name[category_id]})
488
-
489
- info = {}
490
- info['version'] = '1.0'
491
- info['description'] = 'Converted from YOLO format'
492
-
493
- image_ids = set()
494
-
495
-
496
- ## If we're expected to have labels for every image, check before we process all the images
497
-
498
- if not allow_images_without_label_files:
499
- print('Verifying that label files exist')
500
- for image_file_abs in tqdm(image_files_abs):
501
- label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
502
- assert os.path.isfile(label_file_abs), \
503
- 'No annotation file for {}'.format(image_file_abs)
504
-
505
-
506
- ## Initial loop to make sure image IDs will be unique
507
-
508
- print('Validating image IDs...')
509
-
510
- for fn_abs in tqdm(image_files_abs):
511
-
512
- fn_relative = os.path.relpath(fn_abs,input_folder)
513
- image_id = _filename_to_image_id(fn_relative)
514
- assert image_id not in image_ids, \
515
- 'Oops, you have hit a very esoteric case where you have the same filename ' + \
516
- 'with both spaces and underscores, this is not currently handled.'
517
- image_ids.add(image_id)
518
-
519
-
520
- ## Main loop to process labels
521
-
522
- print('Processing labels...')
523
-
524
- if n_workers <= 1:
525
-
526
- image_results = []
527
- for fn_abs in tqdm(image_files_abs):
528
- image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
529
-
530
- else:
531
-
532
- assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
533
-
534
- if pool_type == 'thread':
535
- pool = ThreadPool(n_workers)
536
- else:
537
- pool = Pool(n_workers)
538
-
539
- print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
540
-
541
- p = partial(_process_image,input_folder=input_folder,
542
- category_id_to_name=category_id_to_name)
543
- image_results = list(tqdm(pool.imap(p, image_files_abs),
544
- total=len(image_files_abs)))
545
-
546
-
547
- assert len(image_results) == len(image_files_abs)
548
-
549
-
550
- ## Re-assembly of results into a COCO dict
551
-
552
- print('Assembling labels...')
553
-
554
- images = []
555
- annotations = []
556
-
557
- for image_result in tqdm(image_results):
558
-
559
- im = image_result[0]
560
- annotations_this_image = image_result[1]
561
-
562
- # If we have annotations for this image
563
- if len(annotations_this_image) > 0:
564
- assert im['error'] is None
565
- images.append(im)
566
- for ann in annotations_this_image:
567
- annotations.append(ann)
568
-
569
- # If this image failed to read
570
- elif im['error'] is not None:
571
-
572
- if error_image_handling == 'skip':
573
- pass
574
- elif error_image_handling == 'no_annotations':
575
- images.append(im)
576
-
577
- # If this image read successfully, but there are no annotations
578
- else:
579
-
580
- if empty_image_handling == 'skip':
581
- pass
582
- elif empty_image_handling == 'no_annotations':
583
- images.append(im)
584
- elif empty_image_handling == 'empty_annotations':
585
- assert empty_category_id is not None
586
- ann = {}
587
- ann['id'] = im['id'] + '_0'
588
- ann['image_id'] = im['id']
589
- ann['category_id'] = empty_category_id
590
- ann['sequence_level_annotation'] = False
591
- # This would also be a reasonable thing to do, but it's not the convention
592
- # we're adopting.
593
- # ann['bbox'] = [0,0,0,0]
594
- annotations.append(ann)
595
- images.append(im)
596
-
597
- # ...for each image result
598
-
599
- print('Read {} annotations for {} images'.format(len(annotations),
600
- len(images)))
601
-
602
- d = {}
603
- d['images'] = images
604
- d['annotations'] = annotations
605
- d['categories'] = categories
606
- d['info'] = info
607
-
608
- if output_file is not None:
609
- print('Writing to {}'.format(output_file))
610
- with open(output_file,'w') as f:
611
- json.dump(d,f,indent=1)
612
-
613
- return d
614
-
615
- # ...def yolo_to_coco()
616
-
617
-
618
- #%% Interactive driver
619
-
620
- if False:
621
-
622
- pass
623
-
624
- #%% Convert YOLO folders to COCO
625
-
626
- preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
627
- input_folder = '/home/user/data/noaa-fish/val'
628
- output_file = '/home/user/data/noaa-fish/val.json'
629
- class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
630
-
631
- d = yolo_to_coco(input_folder,class_name_file,output_file)
632
-
633
- input_folder = '/home/user/data/noaa-fish/train'
634
- output_file = '/home/user/data/noaa-fish/train.json'
635
- class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
636
-
637
- d = yolo_to_coco(input_folder,class_name_file,output_file)
638
-
639
-
640
- #%% Check DB integrity
641
-
642
- from data_management.databases import integrity_check_json_db
643
-
644
- options = integrity_check_json_db.IntegrityCheckOptions()
645
- options.baseDir = input_folder
646
- options.bCheckImageSizes = False
647
- options.bCheckImageExistence = True
648
- options.bFindUnusedImages = True
649
-
650
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_file, options)
651
-
652
-
653
- #%% Preview some images
654
-
655
- from md_visualization import visualize_db
656
-
657
- viz_options = visualize_db.DbVizOptions()
658
- viz_options.num_to_visualize = None
659
- viz_options.trim_to_images_with_bboxes = False
660
- viz_options.add_search_links = False
661
- viz_options.sort_by_filename = False
662
- viz_options.parallelize_rendering = True
663
- viz_options.include_filename_links = True
664
-
665
- html_output_file, _ = visualize_db.visualize_db(db_path=output_file,
666
- output_dir=preview_folder,
667
- image_base_dir=input_folder,
668
- options=viz_options)
669
-
670
- from md_utils.path_utils import open_file
671
- open_file(html_output_file)
672
-
673
-
674
- #%% Command-line driver
675
-
676
- # TODO
detection/__init__.py DELETED
File without changes
File without changes