megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,662 @@
1
+ """
2
+
3
+ coco_to_yolo.py
4
+
5
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
6
+ the dataset (to a single folder) in the process.
7
+
8
+ If the input and output folders are the same, writes .txt files to the input folder,
9
+ and neither moves nor modifies images.
10
+
11
+ Currently ignores segmentation masks, and errors if an annotation has a
12
+ segmentation polygon but no bbox.
13
+
14
+ Has only been tested on a handful of COCO Camera Traps data sets; if you
15
+ use it for more general COCO conversion, YMMV.
16
+
17
+ """
18
+
19
+ #%% Imports and constants
20
+
21
+ import json
22
+ import os
23
+ import shutil
24
+
25
+ from collections import defaultdict
26
+ from tqdm import tqdm
27
+
28
+ from megadetector.utils.path_utils import safe_create_link,find_images
29
+
30
+
31
+ #%% Support functions
32
+
33
+ def write_yolo_dataset_file(yolo_dataset_file,
34
+ dataset_base_dir,
35
+ class_list,
36
+ train_folder_relative=None,
37
+ val_folder_relative=None,
38
+ test_folder_relative=None):
39
+ """
40
+ Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
41
+ have a .yaml extension, though it's only a warning if it doesn't).
42
+
43
+ Args:
44
+ yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
45
+ Does not have to be within dataset_base_dir.
46
+ dataset_base_dir (str): the absolute base path of the YOLO dataset
47
+ class_list (list or str): an ordered list of class names (the first item will be class 0,
48
+ etc.), or the name of a text file containing an ordered list of class names (one per
49
+ line, starting from class zero).
50
+ """
51
+
52
+ # Read class names
53
+ if isinstance(class_list,str):
54
+ with open(class_list,'r') as f:
55
+ class_lines = f.readlines()
56
+ class_lines = [s.strip() for s in class_lines]
57
+ class_list = [s for s in class_lines if len(s) > 0]
58
+
59
+ if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
60
+ print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
61
+ yolo_dataset_file))
62
+
63
+ # Write dataset.yaml
64
+ with open(yolo_dataset_file,'w') as f:
65
+
66
+ f.write('# Train/val sets\n')
67
+ f.write('path: {}\n'.format(dataset_base_dir))
68
+ if train_folder_relative is not None:
69
+ f.write('train: {}\n'.format(train_folder_relative))
70
+ if val_folder_relative is not None:
71
+ f.write('val: {}\n'.format(val_folder_relative))
72
+ if test_folder_relative is not None:
73
+ f.write('val: {}\n'.format(test_folder_relative))
74
+
75
+ f.write('\n')
76
+
77
+ f.write('# Classes\n')
78
+ f.write('names:\n')
79
+ for i_class,class_name in enumerate(class_list):
80
+ f.write(' {}: {}\n'.format(i_class,class_name))
81
+
82
+ # ...def write_yolo_dataset_file(...)
83
+
84
+
85
+ def coco_to_yolo(input_image_folder,
86
+ output_folder,
87
+ input_file,
88
+ source_format='coco',
89
+ overwrite_images=False,
90
+ create_image_and_label_folders=False,
91
+ class_file_name='classes.txt',
92
+ allow_empty_annotations=False,
93
+ clip_boxes=False,
94
+ image_id_to_output_image_json_file=None,
95
+ images_to_exclude=None,
96
+ path_replacement_char='#',
97
+ category_names_to_exclude=None,
98
+ category_names_to_include=None,
99
+ write_output=True,
100
+ flatten_paths=True):
101
+ """
102
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
103
+ dataset to a single folder in the process.
104
+
105
+ If the input and output folders are the same, writes .txt files to the input folder,
106
+ and neither moves nor modifies images.
107
+
108
+ Currently ignores segmentation masks, and errors if an annotation has a
109
+ segmentation polygon but no bbox.
110
+
111
+ Args:
112
+ input_image_folder (str): the folder where images live; filenames in the COCO .json
113
+ file [input_file] should be relative to this folder
114
+ output_folder (str): the base folder for the YOLO dataset
115
+ input_file (str): a .json file in COCO format; can be the same as [input_image_folder], in which case
116
+ images are left alone.
117
+ source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
118
+ is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
119
+ annotation with a category id of 0 as a special case, i.e. that's how an empty image
120
+ is indicated. The original COCO standard is a little ambiguous on this issue. If
121
+ source_format is 'coco', we either treat images as empty or error, depending on the value
122
+ of [allow_empty_annotations]. [allow_empty_annotations] has no effect if source_format is
123
+ 'coco_camera_traps'.
124
+ create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
125
+ 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
126
+ a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
127
+ be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
128
+ images/a#b#c#image001.jpg, and the corresponding text file will be
129
+ labels/a#b#c#image001.txt.
130
+ clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
131
+ converting to YOLO xywh format
132
+ image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
133
+ a mapping from image IDs to output file names
134
+ images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
135
+ should ignore
136
+ path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
137
+ path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
138
+ becomes a#b#c#d.jpg
139
+ category_names_to_exclude (str, optional): category names that should not be represented in the
140
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
141
+ you would want to specify this and [category_names_to_include].
142
+ category_names_to_include (str, optional): allow-list of category names that should be represented in the
143
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
144
+ you would want to specify this and [category_names_to_exclude].
145
+ write_output (bool, optional): determines whether we actually copy images and write annotations;
146
+ setting this to False mostly puts this function in "dry run" "mode. The class list
147
+ file is written regardless of the value of write_output.
148
+
149
+ Returns:
150
+ dict: information about the coco --> yolo mapping, containing at least the fields:
151
+
152
+ - class_list_filename: the filename to which we wrote the flat list of class names required
153
+ by the YOLO format.
154
+ - source_image_to_dest_image: a dict mapping source images to destination images
155
+ - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
156
+ """
157
+
158
+ ## Validate input
159
+
160
+ if category_names_to_include is not None and category_names_to_exclude is not None:
161
+ raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
162
+
163
+ if output_folder is None:
164
+ output_folder = input_image_folder
165
+
166
+ if images_to_exclude is not None:
167
+ images_to_exclude = set(images_to_exclude)
168
+
169
+ if category_names_to_exclude is None:
170
+ category_names_to_exclude = {}
171
+
172
+ assert os.path.isdir(input_image_folder)
173
+ assert os.path.isfile(input_file)
174
+ os.makedirs(output_folder,exist_ok=True)
175
+
176
+ if (output_folder == input_image_folder) and (overwrite_images) and \
177
+ (not create_image_and_label_folders) and (not flatten_paths):
178
+ print('Warning: output folder and input folder are the same, disabling overwrite_images')
179
+ overwrite_images = False
180
+
181
+ ## Read input data
182
+
183
+ with open(input_file,'r') as f:
184
+ data = json.load(f)
185
+
186
+
187
+ ## Parse annotations
188
+
189
+ image_id_to_annotations = defaultdict(list)
190
+
191
+ # i_ann = 0; ann = data['annotations'][0]
192
+ for i_ann,ann in enumerate(data['annotations']):
193
+
194
+ # Make sure no annotations have *only* segmentation data
195
+ if ( \
196
+ ('segmentation' in ann.keys()) and \
197
+ (ann['segmentation'] is not None) and \
198
+ (len(ann['segmentation']) > 0) ) \
199
+ and \
200
+ (('bbox' not in ann.keys()) or (ann['bbox'] is None) or (len(ann['bbox'])==0)):
201
+ raise ValueError('Oops: segmentation data present without bbox information, ' + \
202
+ 'this script isn\'t ready for this dataset')
203
+
204
+ image_id_to_annotations[ann['image_id']].append(ann)
205
+
206
+ print('Parsed annotations for {} images'.format(len(image_id_to_annotations)))
207
+
208
+ # Re-map class IDs to make sure they run from 0...n-classes-1
209
+ #
210
+ # Note: this allows unused categories in the output data set. This is OK for
211
+ # some training pipelines, not for others.
212
+ next_category_id = 0
213
+ coco_id_to_yolo_id = {}
214
+ coco_id_to_name = {}
215
+ yolo_id_to_name = {}
216
+ coco_category_ids_to_exclude = set()
217
+
218
+ for category in data['categories']:
219
+ coco_id_to_name[category['id']] = category['name']
220
+ if (category_names_to_include is not None) and \
221
+ (category['name'] not in category_names_to_include):
222
+ coco_category_ids_to_exclude.add(category['id'])
223
+ continue
224
+ elif (category['name'] in category_names_to_exclude):
225
+ coco_category_ids_to_exclude.add(category['id'])
226
+ continue
227
+ assert category['id'] not in coco_id_to_yolo_id
228
+ coco_id_to_yolo_id[category['id']] = next_category_id
229
+ yolo_id_to_name[next_category_id] = category['name']
230
+ next_category_id += 1
231
+
232
+
233
+ ## Process images (everything but I/O)
234
+
235
+ # List of dictionaries with keys 'source_image','dest_image','bboxes','dest_txt'
236
+ images_to_copy = []
237
+
238
+ missing_images = []
239
+ excluded_images = []
240
+
241
+ image_names = set()
242
+
243
+ typical_image_extensions = set(['.jpg','.jpeg','.png','.gif','.tif','.bmp'])
244
+
245
+ printed_empty_annotation_warning = False
246
+
247
+ image_id_to_output_image_name = {}
248
+
249
+ print('Processing annotations')
250
+
251
+ n_clipped_boxes = 0
252
+ n_total_boxes = 0
253
+
254
+ # i_image = 0; im = data['images'][i_image]
255
+ for i_image,im in tqdm(enumerate(data['images']),total=len(data['images'])):
256
+
257
+ output_info = {}
258
+ source_image = os.path.join(input_image_folder,im['file_name'])
259
+ output_info['source_image'] = source_image
260
+
261
+ if images_to_exclude is not None and im['file_name'] in images_to_exclude:
262
+ excluded_images.append(im['file_name'])
263
+ continue
264
+
265
+ tokens = os.path.splitext(im['file_name'])
266
+ if tokens[1].lower() not in typical_image_extensions:
267
+ print('Warning: unusual image file name {}'.format(im['file_name']))
268
+
269
+ if flatten_paths:
270
+ image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
271
+ '_' + str(i_image).zfill(6)
272
+ else:
273
+ image_name = tokens[0]
274
+
275
+ assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
276
+ image_names.add(image_name)
277
+
278
+ assert im['id'] not in image_id_to_output_image_name
279
+ image_id_to_output_image_name[im['id']] = image_name
280
+
281
+ dest_image_relative = image_name + tokens[1]
282
+ output_info['dest_image_relative'] = dest_image_relative
283
+ dest_txt_relative = image_name + '.txt'
284
+ output_info['dest_txt_relative'] = dest_txt_relative
285
+ output_info['bboxes'] = []
286
+
287
+ # assert os.path.isfile(source_image), 'Could not find image {}'.format(source_image)
288
+ if not os.path.isfile(source_image):
289
+ print('Warning: could not find image {}'.format(source_image))
290
+ missing_images.append(im['file_name'])
291
+ continue
292
+
293
+ image_id = im['id']
294
+
295
+ image_bboxes = []
296
+
297
+ if image_id in image_id_to_annotations:
298
+
299
+ for ann in image_id_to_annotations[image_id]:
300
+
301
+ # If this annotation has no bounding boxes...
302
+ if 'bbox' not in ann or ann['bbox'] is None or len(ann['bbox']) == 0:
303
+
304
+ if source_format == 'coco':
305
+
306
+ if not allow_empty_annotations:
307
+ # This is not entirely clear from the COCO spec, but it seems to be consensus
308
+ # that if you want to specify an image with no objects, you don't include any
309
+ # annotations for that image.
310
+ raise ValueError('If an annotation exists, it should have content')
311
+ else:
312
+ continue
313
+
314
+ elif source_format == 'coco_camera_traps':
315
+
316
+ # We allow empty bbox lists in COCO camera traps; this is typically a negative
317
+ # example in a dataset that has bounding boxes, and 0 is typically the empty
318
+ # category.
319
+ if ann['category_id'] != 0:
320
+ if not printed_empty_annotation_warning:
321
+ printed_empty_annotation_warning = True
322
+ print('Warning: non-bbox annotation found with category {}'.format(
323
+ ann['category_id']))
324
+ continue
325
+
326
+ else:
327
+
328
+ raise ValueError('Unrecognized COCO variant: {}'.format(source_format))
329
+
330
+ # ...if this is an empty annotation
331
+
332
+ coco_bbox = ann['bbox']
333
+
334
+ # This category isn't in our category list. This typically corresponds to whole sets
335
+ # of images that were excluded from the YOLO set.
336
+ if ann['category_id'] in coco_category_ids_to_exclude:
337
+ continue
338
+
339
+ yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
340
+
341
+ # COCO: [x_min, y_min, width, height] in absolute coordinates
342
+ # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
343
+
344
+ # Convert from COCO coordinates to YOLO coordinates
345
+ img_w = im['width']
346
+ img_h = im['height']
347
+
348
+ if source_format in ('coco','coco_camera_traps'):
349
+
350
+ x_min_absolute = coco_bbox[0]
351
+ y_min_absolute = coco_bbox[1]
352
+ box_w_absolute = coco_bbox[2]
353
+ box_h_absolute = coco_bbox[3]
354
+
355
+ x_center_absolute = (x_min_absolute + (x_min_absolute + box_w_absolute)) / 2
356
+ y_center_absolute = (y_min_absolute + (y_min_absolute + box_h_absolute)) / 2
357
+
358
+ x_center_relative = x_center_absolute / img_w
359
+ y_center_relative = y_center_absolute / img_h
360
+
361
+ box_w_relative = box_w_absolute / img_w
362
+ box_h_relative = box_h_absolute / img_h
363
+
364
+ else:
365
+
366
+ raise ValueError('Unrecognized source format {}'.format(source_format))
367
+
368
+ if clip_boxes:
369
+
370
+ clipped_box = False
371
+
372
+ box_right = x_center_relative + (box_w_relative / 2.0)
373
+ if box_right > 1.0:
374
+ clipped_box = True
375
+ overhang = box_right - 1.0
376
+ box_w_relative -= overhang
377
+ x_center_relative -= (overhang / 2.0)
378
+
379
+ box_bottom = y_center_relative + (box_h_relative / 2.0)
380
+ if box_bottom > 1.0:
381
+ clipped_box = True
382
+ overhang = box_bottom - 1.0
383
+ box_h_relative -= overhang
384
+ y_center_relative -= (overhang / 2.0)
385
+
386
+ box_left = x_center_relative - (box_w_relative / 2.0)
387
+ if box_left < 0.0:
388
+ clipped_box = True
389
+ overhang = abs(box_left)
390
+ box_w_relative -= overhang
391
+ x_center_relative += (overhang / 2.0)
392
+
393
+ box_top = y_center_relative - (box_h_relative / 2.0)
394
+ if box_top < 0.0:
395
+ clipped_box = True
396
+ overhang = abs(box_top)
397
+ box_h_relative -= overhang
398
+ y_center_relative += (overhang / 2.0)
399
+
400
+ if clipped_box:
401
+ n_clipped_boxes += 1
402
+
403
+ yolo_box = [yolo_category_id,
404
+ x_center_relative, y_center_relative,
405
+ box_w_relative, box_h_relative]
406
+
407
+ image_bboxes.append(yolo_box)
408
+ n_total_boxes += 1
409
+
410
+ # ...for each annotation
411
+
412
+ # ...if this image has annotations
413
+
414
+ output_info['bboxes'] = image_bboxes
415
+
416
+ images_to_copy.append(output_info)
417
+
418
+ # ...for each image
419
+
420
+ print('\nWriting {} boxes ({} clipped) for {} images'.format(n_total_boxes,
421
+ n_clipped_boxes,len(images_to_copy)))
422
+ print('{} missing images (of {})'.format(len(missing_images),len(data['images'])))
423
+
424
+ if images_to_exclude is not None:
425
+ print('{} excluded images (of {})'.format(len(excluded_images),len(data['images'])))
426
+
427
+
428
+ ## Write output
429
+
430
+ print('Generating class list')
431
+
432
+ class_list_filename = os.path.join(output_folder,class_file_name)
433
+ with open(class_list_filename, 'w') as f:
434
+ print('Writing class list to {}'.format(class_list_filename))
435
+ for i_class in range(0,len(yolo_id_to_name)):
436
+ # Category IDs should range from 0..N-1
437
+ assert i_class in yolo_id_to_name
438
+ f.write(yolo_id_to_name[i_class] + '\n')
439
+
440
+ if image_id_to_output_image_json_file is not None:
441
+ print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
442
+ with open(image_id_to_output_image_json_file,'w') as f:
443
+ json.dump(image_id_to_output_image_name,f,indent=1)
444
+
445
+
446
+ if (output_folder == input_image_folder) and (not create_image_and_label_folders):
447
+ print('Creating annotation files (not copying images, input and output folder are the same)')
448
+ else:
449
+ print('Copying images and creating annotation files')
450
+
451
+ if create_image_and_label_folders:
452
+ dest_image_folder = os.path.join(output_folder,'images')
453
+ dest_txt_folder = os.path.join(output_folder,'labels')
454
+ else:
455
+ dest_image_folder = output_folder
456
+ dest_txt_folder = output_folder
457
+
458
+ source_image_to_dest_image = {}
459
+
460
+ # TODO: parallelize this loop
461
+ #
462
+ # output_info = images_to_copy[0]
463
+ for output_info in tqdm(images_to_copy):
464
+
465
+ source_image = output_info['source_image']
466
+ dest_image_relative = output_info['dest_image_relative']
467
+ dest_txt_relative = output_info['dest_txt_relative']
468
+
469
+ dest_image = os.path.join(dest_image_folder,dest_image_relative)
470
+ dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
471
+
472
+ source_image_to_dest_image[source_image] = dest_image
473
+
474
+ if write_output:
475
+
476
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
477
+ os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
478
+
479
+ if not create_image_and_label_folders:
480
+ assert os.path.dirname(dest_image) == os.path.dirname(dest_txt)
481
+
482
+ if (not os.path.isfile(dest_image)) or (overwrite_images):
483
+ shutil.copyfile(source_image,dest_image)
484
+
485
+ bboxes = output_info['bboxes']
486
+
487
+ # Only write an annotation file if there are bounding boxes. Images with
488
+ # no .txt files are treated as hard negatives, at least by YOLOv5:
489
+ #
490
+ # https://github.com/ultralytics/yolov5/issues/3218
491
+ #
492
+ # I think this is also true for images with empty .txt files, but
493
+ # I'm using the convention suggested on that issue, i.e. hard
494
+ # negatives are expressed as images without .txt files.
495
+ if len(bboxes) > 0:
496
+
497
+ with open(dest_txt,'w') as f:
498
+
499
+ # bbox = bboxes[0]
500
+ for bbox in bboxes:
501
+ assert len(bbox) == 5
502
+ s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
503
+ f.write(s + '\n')
504
+
505
+ # ...if we're actually writing output
506
+
507
+ # ...for each image
508
+
509
+ coco_to_yolo_info = {}
510
+ coco_to_yolo_info['class_list_filename'] = class_list_filename
511
+ coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
512
+ coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
513
+
514
+ return coco_to_yolo_info
515
+
516
+ # ...def coco_to_yolo(...)
517
+
518
+
519
+ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
520
+ class_list_file=None,
521
+ class_list_output_name='object.data',
522
+ force_lowercase_image_extension=False):
523
+ """
524
+ Given a YOLO-formatted folder of images and .txt files, creates a folder
525
+ of symlinks to all the images, and a folder of symlinks to all the labels.
526
+ Used to support preview/editing tools that assume images and labels are in separate
527
+ folders.
528
+
529
+ :meta private:
530
+ """
531
+
532
+ assert source_folder != images_folder and source_folder != labels_folder
533
+
534
+ os.makedirs(images_folder,exist_ok=True)
535
+ os.makedirs(labels_folder,exist_ok=True)
536
+
537
+ image_files_relative = find_images(source_folder,recursive=True,return_relative_paths=True)
538
+
539
+ # image_fn_relative = image_files_relative[0]=
540
+ for image_fn_relative in tqdm(image_files_relative):
541
+
542
+ source_file_abs = os.path.join(source_folder,image_fn_relative)
543
+ target_file_abs = os.path.join(images_folder,image_fn_relative)
544
+
545
+ if force_lowercase_image_extension:
546
+ tokens = os.path.splitext(target_file_abs)
547
+ target_file_abs = tokens[0] + tokens[1].lower()
548
+
549
+ os.makedirs(os.path.dirname(target_file_abs),exist_ok=True)
550
+ safe_create_link(source_file_abs,target_file_abs)
551
+ source_annotation_file_abs = os.path.splitext(source_file_abs)[0] + '.txt'
552
+ if os.path.isfile(source_annotation_file_abs):
553
+ target_annotation_file_abs = \
554
+ os.path.splitext(os.path.join(labels_folder,image_fn_relative))[0] + '.txt'
555
+ os.makedirs(os.path.dirname(target_annotation_file_abs),exist_ok=True)
556
+ safe_create_link(source_annotation_file_abs,target_annotation_file_abs)
557
+
558
+ # ...for each image
559
+
560
+ if class_list_file is not None:
561
+ target_class_list_file = os.path.join(labels_folder,class_list_output_name)
562
+ safe_create_link(class_list_file,target_class_list_file)
563
+
564
+ # ...def create_yolo_symlinks(...)
565
+
566
+
567
+ #%% Interactive driver
568
+
569
+ if False:
570
+
571
+ pass
572
+
573
+ #%% Options
574
+
575
+ input_file = os.path.expanduser('~/data/md-test-coco.json')
576
+ image_folder = os.path.expanduser('~/data/md-test')
577
+ output_folder = os.path.expanduser('~/data/md-test-yolo')
578
+ create_image_and_label_folders=False
579
+ class_file_name='classes.txt'
580
+ allow_empty_annotations=False
581
+ clip_boxes=False
582
+ image_id_to_output_image_json_file=None
583
+ images_to_exclude=None
584
+ path_replacement_char='#'
585
+ category_names_to_exclude=None
586
+
587
+
588
+ #%% Programmatic execution
589
+
590
+ coco_to_yolo_results = coco_to_yolo(image_folder,output_folder,input_file,
591
+ source_format='coco',
592
+ overwrite_images=False,
593
+ create_image_and_label_folders=create_image_and_label_folders,
594
+ class_file_name=class_file_name,
595
+ allow_empty_annotations=allow_empty_annotations,
596
+ clip_boxes=clip_boxes)
597
+
598
+ create_yolo_symlinks(source_folder=output_folder,
599
+ images_folder=output_folder + '/images',
600
+ labels_folder=output_folder + '/labels',
601
+ class_list_file=coco_to_yolo_results['class_list_filename'],
602
+ class_list_output_name='object.data',
603
+ force_lowercase_image_extension=True)
604
+
605
+
606
+ #%% Prepare command-line example
607
+
608
+ s = 'python coco_to_yolo.py {} {} {} --create_bounding_box_editor_symlinks'.format(
609
+ image_folder,output_folder,input_file)
610
+ print(s)
611
+ import clipboard; clipboard.copy(s)
612
+
613
+
614
+ #%% Command-line driver
615
+
616
+ import sys,argparse
617
+
618
+ def main():
619
+
620
+ parser = argparse.ArgumentParser(
621
+ description='Convert COCO-formatted data to YOLO format, flattening the image structure')
622
+
623
+ # input_image_folder,output_folder,input_file
624
+
625
+ parser.add_argument(
626
+ 'input_folder',
627
+ type=str,
628
+ help='Path to input images')
629
+
630
+ parser.add_argument(
631
+ 'output_folder',
632
+ type=str,
633
+ help='Path to flat, YOLO-formatted dataset')
634
+
635
+ parser.add_argument(
636
+ 'input_file',
637
+ type=str,
638
+ help='Path to COCO dataset file (.json)')
639
+
640
+ parser.add_argument(
641
+ '--create_bounding_box_editor_symlinks',
642
+ action='store_true',
643
+ help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
644
+
645
+ if len(sys.argv[1:]) == 0:
646
+ parser.print_help()
647
+ parser.exit()
648
+
649
+ args = parser.parse_args()
650
+
651
+ coco_to_yolo_results = coco_to_yolo(args.input_folder,args.output_folder,args.input_file)
652
+
653
+ if args.create_bounding_box_editor_symlinks:
654
+ create_yolo_symlinks(source_folder=args.output_folder,
655
+ images_folder=args.output_folder + '/images',
656
+ labels_folder=args.output_folder + '/labels',
657
+ class_list_file=coco_to_yolo_results['class_list_filename'],
658
+ class_list_output_name='object.data',
659
+ force_lowercase_image_extension=True)
660
+
661
+ if __name__ == '__main__':
662
+ main()
File without changes
@@ -0,0 +1,33 @@
1
+ """
2
+
3
+ add_width_and_height_to_db.py
4
+
5
+ Grabs width and height from actual image files for a .json database that is missing w/h.
6
+
7
+ TODO: this is a one-off script waiting to be cleaned up for more general use.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import json
14
+ from PIL import Image
15
+
16
+ datafile = '/datadrive/snapshotserengeti/databases/snapshotserengeti.json'
17
+ image_base = '/datadrive/snapshotserengeti/images/'
18
+
19
+ def main():
20
+
21
+ with open(datafile,'r') as f:
22
+ data = json.load(f)
23
+
24
+ for im in data['images']:
25
+ if 'height' not in im:
26
+ im_w, im_h = Image.open(image_base+im['file_name']).size
27
+ im['height'] = im_h
28
+ im['width'] = im_w
29
+
30
+ json.dump(data, open(datafile,'w'))
31
+
32
+ if __name__ == '__main__':
33
+ main()