megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ the dataset (to a single folder) in the process.
8
8
  If the input and output folders are the same, writes .txt files to the input folder,
9
9
  and neither moves nor modifies images.
10
10
 
11
- Currently ignores segmentation masks, and errors if an annotation has a
11
+ Currently ignores segmentation masks, and errors if an annotation has a
12
12
  segmentation polygon but no bbox.
13
13
 
14
14
  Has only been tested on a handful of COCO Camera Traps data sets; if you
@@ -21,6 +21,8 @@ use it for more general COCO conversion, YMMV.
21
21
  import json
22
22
  import os
23
23
  import shutil
24
+ import sys
25
+ import argparse
24
26
 
25
27
  from collections import defaultdict
26
28
  from tqdm import tqdm
@@ -38,34 +40,40 @@ def write_yolo_dataset_file(yolo_dataset_file,
38
40
  test_folder_relative=None):
39
41
  """
40
42
  Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
41
- have a .yaml extension, though it's only a warning if it doesn't).
42
-
43
+ have a .yaml extension, though it's only a warning if it doesn't).
44
+
43
45
  Args:
44
- yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
46
+ yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
45
47
  Does not have to be within dataset_base_dir.
46
48
  dataset_base_dir (str): the absolute base path of the YOLO dataset
47
- class_list (list or str): an ordered list of class names (the first item will be class 0,
48
- etc.), or the name of a text file containing an ordered list of class names (one per
49
+ class_list (list or str): an ordered list of class names (the first item will be class 0,
50
+ etc.), or the name of a text file containing an ordered list of class names (one per
49
51
  line, starting from class zero).
50
- train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
51
- val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
52
- test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
52
+ train_folder_relative (str, optional): train folder name, used only to
53
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
54
+ files).
55
+ val_folder_relative (str, optional): val folder name, used only to
56
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
57
+ files).
58
+ test_folder_relative (str, optional): test folder name, used only to
59
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
60
+ files).
53
61
  """
54
-
62
+
55
63
  # Read class names
56
64
  if isinstance(class_list,str):
57
65
  with open(class_list,'r') as f:
58
66
  class_lines = f.readlines()
59
- class_lines = [s.strip() for s in class_lines]
67
+ class_lines = [s.strip() for s in class_lines]
60
68
  class_list = [s for s in class_lines if len(s) > 0]
61
69
 
62
70
  if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
63
71
  print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
64
72
  yolo_dataset_file))
65
-
73
+
66
74
  # Write dataset.yaml
67
75
  with open(yolo_dataset_file,'w') as f:
68
-
76
+
69
77
  f.write('# Train/val sets\n')
70
78
  f.write('path: {}\n'.format(dataset_base_dir))
71
79
  if train_folder_relative is not None:
@@ -74,9 +82,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
74
82
  f.write('val: {}\n'.format(val_folder_relative))
75
83
  if test_folder_relative is not None:
76
84
  f.write('val: {}\n'.format(test_folder_relative))
77
-
85
+
78
86
  f.write('\n')
79
-
87
+
80
88
  f.write('# Classes\n')
81
89
  f.write('names:\n')
82
90
  for i_class,class_name in enumerate(class_list):
@@ -84,7 +92,7 @@ def write_yolo_dataset_file(yolo_dataset_file,
84
92
 
85
93
  # ...def write_yolo_dataset_file(...)
86
94
 
87
-
95
+
88
96
  def coco_to_yolo(input_image_folder,
89
97
  output_folder,
90
98
  input_file,
@@ -102,15 +110,15 @@ def coco_to_yolo(input_image_folder,
102
110
  write_output=True,
103
111
  flatten_paths=False):
104
112
  """
105
- Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
113
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
106
114
  dataset to a single folder in the process.
107
-
115
+
108
116
  If the input and output folders are the same, writes .txt files to the input folder,
109
117
  and neither moves nor modifies images.
110
-
111
- Currently ignores segmentation masks, and errors if an annotation has a
118
+
119
+ Currently ignores segmentation masks, and errors if an annotation has a
112
120
  segmentation polygon but no bbox.
113
-
121
+
114
122
  Args:
115
123
  input_image_folder (str): the folder where images live; filenames in the COCO .json
116
124
  file [input_file] should be relative to this folder
@@ -119,88 +127,88 @@ def coco_to_yolo(input_image_folder,
119
127
  images are left alone.
120
128
  source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
121
129
  is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
122
- annotation as a special case, i.e. that's how an empty image is indicated. The original
123
- COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
130
+ annotation as a special case, i.e. that's how an empty image is indicated. The original
131
+ COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
124
132
  either treat images as empty or error, depending on the value of [allow_empty_annotations].
125
133
  [allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
126
134
  overwrite_images (bool, optional): over-write images in the output folder if they exist
127
- create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
128
- 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
129
- a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
130
- be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
131
- images/a#b#c#image001.jpg, and the corresponding text file will be
132
- labels/a#b#c#image001.txt.
133
- class_file_name (str, optional): .txt file (relative to the output folder) that we should
135
+ create_image_and_label_folders (bool, optional): whether to create separate folders called 'images' and
136
+ 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
137
+ a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
138
+ be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
139
+ images/a#b#c#image001.jpg, and the corresponding text file will be
140
+ labels/a#b#c#image001.txt.
141
+ class_file_name (str, optional): .txt file (relative to the output folder) that we should
134
142
  populate with a list of classes (or None to omit)
135
- allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
143
+ allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
136
144
  we'll error on annotations that have no 'bbox' field
137
145
  clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
138
146
  converting to YOLO xywh format
139
147
  image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
140
148
  a mapping from image IDs to output file names
141
- images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
149
+ images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
142
150
  should ignore
143
151
  path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
144
152
  path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
145
153
  becomes a#b#c#d.jpg
146
154
  category_names_to_exclude (str, optional): category names that should not be represented in the
147
155
  YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
148
- you would want to specify this and [category_names_to_include].
149
- category_names_to_include (str, optional): allow-list of category names that should be represented
150
- in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
151
- no reason you would want to specify this and [category_names_to_exclude].
156
+ you would want to specify this and [category_names_to_include].
157
+ category_names_to_include (str, optional): allow-list of category names that should be represented
158
+ in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
159
+ no reason you would want to specify this and [category_names_to_exclude].
152
160
  write_output (bool, optional): determines whether we actually copy images and write annotations;
153
161
  setting this to False mostly puts this function in "dry run" "mode. The class list
154
162
  file is written regardless of the value of write_output.
155
163
  flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
156
164
  which ensures that the output folder is a single flat folder.
157
-
165
+
158
166
  Returns:
159
167
  dict: information about the coco --> yolo mapping, containing at least the fields:
160
-
161
- - class_list_filename: the filename to which we wrote the flat list of class names required
168
+
169
+ - class_list_filename: the filename to which we wrote the flat list of class names required
162
170
  by the YOLO format.
163
171
  - source_image_to_dest_image: a dict mapping source images to destination images
164
- - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
172
+ - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
165
173
  """
166
-
174
+
167
175
  ## Validate input
168
-
176
+
169
177
  if category_names_to_include is not None and category_names_to_exclude is not None:
170
178
  raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
171
-
179
+
172
180
  if output_folder is None:
173
181
  output_folder = input_image_folder
174
-
182
+
175
183
  if images_to_exclude is not None:
176
184
  images_to_exclude = set(images_to_exclude)
177
-
185
+
178
186
  if category_names_to_exclude is None:
179
187
  category_names_to_exclude = {}
180
-
188
+
181
189
  assert os.path.isdir(input_image_folder)
182
190
  assert os.path.isfile(input_file)
183
191
  os.makedirs(output_folder,exist_ok=True)
184
-
192
+
185
193
  if (output_folder == input_image_folder) and (overwrite_images) and \
186
194
  (not create_image_and_label_folders) and (not flatten_paths):
187
195
  print('Warning: output folder and input folder are the same, disabling overwrite_images')
188
196
  overwrite_images = False
189
-
197
+
190
198
  ## Read input data
191
-
199
+
192
200
  with open(input_file,'r') as f:
193
201
  data = json.load(f)
194
-
195
-
202
+
203
+
196
204
  ## Parse annotations
197
-
205
+
198
206
  image_id_to_annotations = defaultdict(list)
199
-
207
+
200
208
  # i_ann = 0; ann = data['annotations'][0]
201
209
  for i_ann,ann in enumerate(data['annotations']):
202
-
203
- # Make sure no annotations have *only* segmentation data
210
+
211
+ # Make sure no annotations have *only* segmentation data
204
212
  if ( \
205
213
  ('segmentation' in ann.keys()) and \
206
214
  (ann['segmentation'] is not None) and \
@@ -209,21 +217,21 @@ def coco_to_yolo(input_image_folder,
209
217
  (('bbox' not in ann.keys()) or (ann['bbox'] is None) or (len(ann['bbox'])==0)):
210
218
  raise ValueError('Oops: segmentation data present without bbox information, ' + \
211
219
  'this script isn\'t ready for this dataset')
212
-
220
+
213
221
  image_id_to_annotations[ann['image_id']].append(ann)
214
-
222
+
215
223
  print('Parsed annotations for {} images'.format(len(image_id_to_annotations)))
216
-
224
+
217
225
  # Re-map class IDs to make sure they run from 0...n-classes-1
218
226
  #
219
227
  # Note: this allows unused categories in the output data set. This is OK for
220
228
  # some training pipelines, not for others.
221
229
  next_category_id = 0
222
- coco_id_to_yolo_id = {}
230
+ coco_id_to_yolo_id = {}
223
231
  coco_id_to_name = {}
224
232
  yolo_id_to_name = {}
225
233
  coco_category_ids_to_exclude = set()
226
-
234
+
227
235
  for category in data['categories']:
228
236
  coco_id_to_name[category['id']] = category['name']
229
237
  if (category_names_to_include is not None) and \
@@ -232,86 +240,86 @@ def coco_to_yolo(input_image_folder,
232
240
  continue
233
241
  elif (category['name'] in category_names_to_exclude):
234
242
  coco_category_ids_to_exclude.add(category['id'])
235
- continue
243
+ continue
236
244
  assert category['id'] not in coco_id_to_yolo_id
237
245
  coco_id_to_yolo_id[category['id']] = next_category_id
238
246
  yolo_id_to_name[next_category_id] = category['name']
239
247
  next_category_id += 1
240
-
241
-
248
+
249
+
242
250
  ## Process images (everything but I/O)
243
-
251
+
244
252
  # List of dictionaries with keys 'source_image','dest_image','bboxes','dest_txt'
245
253
  images_to_copy = []
246
-
254
+
247
255
  missing_images = []
248
256
  excluded_images = []
249
-
257
+
250
258
  image_names = set()
251
-
259
+
252
260
  typical_image_extensions = set(['.jpg','.jpeg','.png','.gif','.tif','.bmp'])
253
-
261
+
254
262
  printed_empty_annotation_warning = False
255
-
263
+
256
264
  image_id_to_output_image_name = {}
257
-
265
+
258
266
  print('Processing annotations')
259
-
267
+
260
268
  n_clipped_boxes = 0
261
269
  n_total_boxes = 0
262
-
270
+
263
271
  # i_image = 0; im = data['images'][i_image]
264
272
  for i_image,im in tqdm(enumerate(data['images']),total=len(data['images'])):
265
-
273
+
266
274
  output_info = {}
267
- source_image = os.path.join(input_image_folder,im['file_name'])
275
+ source_image = os.path.join(input_image_folder,im['file_name'])
268
276
  output_info['source_image'] = source_image
269
-
277
+
270
278
  if images_to_exclude is not None and im['file_name'] in images_to_exclude:
271
279
  excluded_images.append(im['file_name'])
272
280
  continue
273
-
281
+
274
282
  tokens = os.path.splitext(im['file_name'])
275
283
  if tokens[1].lower() not in typical_image_extensions:
276
284
  print('Warning: unusual image file name {}'.format(im['file_name']))
277
-
285
+
278
286
  if flatten_paths:
279
287
  image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
280
- '_' + str(i_image).zfill(6)
288
+ '_' + str(i_image).zfill(6)
281
289
  else:
282
290
  image_name = tokens[0]
283
291
 
284
292
  assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
285
293
  image_names.add(image_name)
286
-
294
+
287
295
  assert im['id'] not in image_id_to_output_image_name
288
296
  image_id_to_output_image_name[im['id']] = image_name
289
-
297
+
290
298
  dest_image_relative = image_name + tokens[1]
291
299
  output_info['dest_image_relative'] = dest_image_relative
292
300
  dest_txt_relative = image_name + '.txt'
293
301
  output_info['dest_txt_relative'] = dest_txt_relative
294
302
  output_info['bboxes'] = []
295
-
303
+
296
304
  # assert os.path.isfile(source_image), 'Could not find image {}'.format(source_image)
297
305
  if not os.path.isfile(source_image):
298
306
  print('Warning: could not find image {}'.format(source_image))
299
307
  missing_images.append(im['file_name'])
300
308
  continue
301
-
309
+
302
310
  image_id = im['id']
303
-
311
+
304
312
  image_bboxes = []
305
-
313
+
306
314
  if image_id in image_id_to_annotations:
307
-
315
+
308
316
  for ann in image_id_to_annotations[image_id]:
309
-
317
+
310
318
  # If this annotation has no bounding boxes...
311
319
  if 'bbox' not in ann or ann['bbox'] is None or len(ann['bbox']) == 0:
312
-
320
+
313
321
  if source_format == 'coco':
314
-
322
+
315
323
  if not allow_empty_annotations:
316
324
  # This is not entirely clear from the COCO spec, but it seems to be consensus
317
325
  # that if you want to specify an image with no objects, you don't include any
@@ -319,11 +327,11 @@ def coco_to_yolo(input_image_folder,
319
327
  raise ValueError('If an annotation exists, it should have content')
320
328
  else:
321
329
  continue
322
-
330
+
323
331
  elif source_format == 'coco_camera_traps':
324
-
325
- # We allow empty bbox lists in COCO camera traps files; this is typically a
326
- # negative example in a dataset that has bounding boxes, and 0 is typically
332
+
333
+ # We allow empty bbox lists in COCO camera traps files; this is typically a
334
+ # negative example in a dataset that has bounding boxes, and 0 is typically
327
335
  # the empty category, which is typically 0.
328
336
  if ann['category_id'] != 0:
329
337
  if not printed_empty_annotation_warning:
@@ -331,113 +339,113 @@ def coco_to_yolo(input_image_folder,
331
339
  print('Warning: non-bbox annotation found with category {}'.format(
332
340
  ann['category_id']))
333
341
  continue
334
-
342
+
335
343
  else:
336
-
344
+
337
345
  raise ValueError('Unrecognized COCO variant: {}'.format(source_format))
338
-
346
+
339
347
  # ...if this is an empty annotation
340
-
348
+
341
349
  coco_bbox = ann['bbox']
342
-
350
+
343
351
  # This category isn't in our category list. This typically corresponds to whole sets
344
352
  # of images that were excluded from the YOLO set.
345
353
  if ann['category_id'] in coco_category_ids_to_exclude:
346
354
  continue
347
-
355
+
348
356
  yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
349
-
357
+
350
358
  # COCO: [x_min, y_min, width, height] in absolute coordinates
351
359
  # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
352
-
360
+
353
361
  # Convert from COCO coordinates to YOLO coordinates
354
362
  img_w = im['width']
355
363
  img_h = im['height']
356
-
364
+
357
365
  if source_format in ('coco','coco_camera_traps'):
358
-
366
+
359
367
  x_min_absolute = coco_bbox[0]
360
368
  y_min_absolute = coco_bbox[1]
361
369
  box_w_absolute = coco_bbox[2]
362
370
  box_h_absolute = coco_bbox[3]
363
-
371
+
364
372
  x_center_absolute = (x_min_absolute + (x_min_absolute + box_w_absolute)) / 2
365
373
  y_center_absolute = (y_min_absolute + (y_min_absolute + box_h_absolute)) / 2
366
-
374
+
367
375
  x_center_relative = x_center_absolute / img_w
368
376
  y_center_relative = y_center_absolute / img_h
369
-
377
+
370
378
  box_w_relative = box_w_absolute / img_w
371
379
  box_h_relative = box_h_absolute / img_h
372
-
380
+
373
381
  else:
374
-
382
+
375
383
  raise ValueError('Unrecognized source format {}'.format(source_format))
376
-
384
+
377
385
  if clip_boxes:
378
-
386
+
379
387
  clipped_box = False
380
-
381
- box_right = x_center_relative + (box_w_relative / 2.0)
388
+
389
+ box_right = x_center_relative + (box_w_relative / 2.0)
382
390
  if box_right > 1.0:
383
391
  clipped_box = True
384
392
  overhang = box_right - 1.0
385
393
  box_w_relative -= overhang
386
394
  x_center_relative -= (overhang / 2.0)
387
395
 
388
- box_bottom = y_center_relative + (box_h_relative / 2.0)
396
+ box_bottom = y_center_relative + (box_h_relative / 2.0)
389
397
  if box_bottom > 1.0:
390
398
  clipped_box = True
391
399
  overhang = box_bottom - 1.0
392
400
  box_h_relative -= overhang
393
401
  y_center_relative -= (overhang / 2.0)
394
-
402
+
395
403
  box_left = x_center_relative - (box_w_relative / 2.0)
396
404
  if box_left < 0.0:
397
405
  clipped_box = True
398
406
  overhang = abs(box_left)
399
407
  box_w_relative -= overhang
400
408
  x_center_relative += (overhang / 2.0)
401
-
409
+
402
410
  box_top = y_center_relative - (box_h_relative / 2.0)
403
411
  if box_top < 0.0:
404
412
  clipped_box = True
405
413
  overhang = abs(box_top)
406
414
  box_h_relative -= overhang
407
415
  y_center_relative += (overhang / 2.0)
408
-
416
+
409
417
  if clipped_box:
410
418
  n_clipped_boxes += 1
411
-
419
+
412
420
  yolo_box = [yolo_category_id,
413
- x_center_relative, y_center_relative,
421
+ x_center_relative, y_center_relative,
414
422
  box_w_relative, box_h_relative]
415
-
423
+
416
424
  image_bboxes.append(yolo_box)
417
425
  n_total_boxes += 1
418
-
419
- # ...for each annotation
420
-
426
+
427
+ # ...for each annotation
428
+
421
429
  # ...if this image has annotations
422
-
430
+
423
431
  output_info['bboxes'] = image_bboxes
424
-
425
- images_to_copy.append(output_info)
426
-
432
+
433
+ images_to_copy.append(output_info)
434
+
427
435
  # ...for each image
428
-
436
+
429
437
  print('\nWriting {} boxes ({} clipped) for {} images'.format(n_total_boxes,
430
438
  n_clipped_boxes,len(images_to_copy)))
431
439
  print('{} missing images (of {})'.format(len(missing_images),len(data['images'])))
432
-
440
+
433
441
  if images_to_exclude is not None:
434
442
  print('{} excluded images (of {})'.format(len(excluded_images),len(data['images'])))
435
-
436
-
443
+
444
+
437
445
  ## Write output
438
-
446
+
439
447
  print('Generating class list')
440
-
448
+
441
449
  if class_file_name is not None:
442
450
  class_list_filename = os.path.join(output_folder,class_file_name)
443
451
  with open(class_list_filename, 'w') as f:
@@ -446,12 +454,12 @@ def coco_to_yolo(input_image_folder,
446
454
  # Category IDs should range from 0..N-1
447
455
  assert i_class in yolo_id_to_name
448
456
  f.write(yolo_id_to_name[i_class] + '\n')
449
-
457
+
450
458
  if image_id_to_output_image_json_file is not None:
451
459
  print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
452
460
  with open(image_id_to_output_image_json_file,'w') as f:
453
461
  json.dump(image_id_to_output_image_name,f,indent=1)
454
-
462
+
455
463
 
456
464
  if (output_folder == input_image_folder) and (not create_image_and_label_folders):
457
465
  print('Creating annotation files (not copying images, input and output folder are the same)')
@@ -464,12 +472,12 @@ def coco_to_yolo(input_image_folder,
464
472
  else:
465
473
  dest_image_folder = output_folder
466
474
  dest_txt_folder = output_folder
467
-
475
+
468
476
  source_image_to_dest_image = {}
469
-
477
+
470
478
  label_files_written = []
471
479
  n_boxes_written = 0
472
-
480
+
473
481
  # TODO: parallelize this loop
474
482
  #
475
483
  # output_info = images_to_copy[0]
@@ -478,96 +486,107 @@ def coco_to_yolo(input_image_folder,
478
486
  source_image = output_info['source_image']
479
487
  dest_image_relative = output_info['dest_image_relative']
480
488
  dest_txt_relative = output_info['dest_txt_relative']
481
-
489
+
482
490
  dest_image = os.path.join(dest_image_folder,dest_image_relative)
483
491
  dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
484
-
492
+
485
493
  source_image_to_dest_image[source_image] = dest_image
486
-
494
+
487
495
  # Copy the image if necessary
488
496
  if write_output:
489
-
490
- os.makedirs(os.path.dirname(dest_image),exist_ok=True)
497
+
498
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
491
499
  os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
492
-
500
+
493
501
  if not create_image_and_label_folders:
494
502
  assert os.path.dirname(dest_image) == os.path.dirname(dest_txt)
495
-
503
+
496
504
  if (not os.path.isfile(dest_image)) or (overwrite_images):
497
505
  shutil.copyfile(source_image,dest_image)
498
-
499
- bboxes = output_info['bboxes']
500
-
506
+
507
+ bboxes = output_info['bboxes']
508
+
501
509
  # Write the annotation file if necessary
502
510
  #
503
- # Only write an annotation file if there are bounding boxes. Images with
511
+ # Only write an annotation file if there are bounding boxes. Images with
504
512
  # no .txt files are treated as hard negatives, at least by YOLOv5:
505
513
  #
506
514
  # https://github.com/ultralytics/yolov5/issues/3218
507
515
  #
508
- # I think this is also true for images with empty .txt files, but
509
- # I'm using the convention suggested on that issue, i.e. hard
516
+ # I think this is also true for images with empty .txt files, but
517
+ # I'm using the convention suggested on that issue, i.e. hard
510
518
  # negatives are expressed as images without .txt files.
511
519
  if len(bboxes) > 0:
512
-
520
+
513
521
  n_boxes_written += len(bboxes)
514
522
  label_files_written.append(dest_txt)
515
-
523
+
516
524
  if write_output:
517
-
525
+
518
526
  with open(dest_txt,'w') as f:
519
-
527
+
520
528
  # bbox = bboxes[0]
521
529
  for bbox in bboxes:
522
530
  assert len(bbox) == 5
523
531
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
524
532
  f.write(s + '\n')
525
-
526
-
533
+
534
+
527
535
  # ...for each image
528
-
536
+
529
537
  coco_to_yolo_info = {}
530
538
  coco_to_yolo_info['class_list_filename'] = class_list_filename
531
539
  coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
532
540
  coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
533
541
  coco_to_yolo_info['label_files_written'] = label_files_written
534
542
  coco_to_yolo_info['n_boxes_written'] = n_boxes_written
535
-
543
+
536
544
  return coco_to_yolo_info
537
545
 
538
546
  # ...def coco_to_yolo(...)
539
547
 
540
548
 
541
- def create_yolo_symlinks(source_folder,images_folder,labels_folder,
549
+ def create_yolo_symlinks(source_folder,
550
+ images_folder,
551
+ labels_folder,
542
552
  class_list_file=None,
543
553
  class_list_output_name='object.data',
544
554
  force_lowercase_image_extension=False):
545
555
  """
546
556
  Given a YOLO-formatted folder of images and .txt files, creates a folder
547
- of symlinks to all the images, and a folder of symlinks to all the labels.
548
- Used to support preview/editing tools that assume images and labels are in separate
557
+ of symlinks to all the images, and a folder of symlinks to all the labels.
558
+ Used to support preview/editing tools that assume images and labels are in separate
549
559
  folders.
550
-
560
+
561
+ Args:
562
+ source_folder (str): input folder
563
+ images_folder (str): output folder with links to images
564
+ labels_folder (str): output folder with links to labels
565
+ class_list_file (str, optional): list to classes.txt file
566
+ class_list_output_name (str, optional): output file to write with class information
567
+ force_lowercase_image_extension (bool, False): create symlinks with, e.g., .jpg, even
568
+ if the input image is, e.g., .JPG
569
+
551
570
  :meta private:
552
- """
553
-
571
+ """
572
+
554
573
  assert source_folder != images_folder and source_folder != labels_folder
555
-
574
+
556
575
  os.makedirs(images_folder,exist_ok=True)
557
576
  os.makedirs(labels_folder,exist_ok=True)
558
-
577
+
559
578
  image_files_relative = find_images(source_folder,recursive=True,return_relative_paths=True)
560
-
561
- # image_fn_relative = image_files_relative[0]=
579
+
580
+ # image_fn_relative = image_files_relative[0]=
562
581
  for image_fn_relative in tqdm(image_files_relative):
563
-
582
+
564
583
  source_file_abs = os.path.join(source_folder,image_fn_relative)
565
584
  target_file_abs = os.path.join(images_folder,image_fn_relative)
566
-
585
+
567
586
  if force_lowercase_image_extension:
568
587
  tokens = os.path.splitext(target_file_abs)
569
588
  target_file_abs = tokens[0] + tokens[1].lower()
570
-
589
+
571
590
  os.makedirs(os.path.dirname(target_file_abs),exist_ok=True)
572
591
  safe_create_link(source_file_abs,target_file_abs)
573
592
  source_annotation_file_abs = os.path.splitext(source_file_abs)[0] + '.txt'
@@ -577,7 +596,7 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
577
596
  os.makedirs(os.path.dirname(target_annotation_file_abs),exist_ok=True)
578
597
  safe_create_link(source_annotation_file_abs,target_annotation_file_abs)
579
598
 
580
- # ...for each image
599
+ # ...for each image
581
600
 
582
601
  if class_list_file is not None:
583
602
  target_class_list_file = os.path.join(labels_folder,class_list_output_name)
@@ -589,14 +608,14 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
589
608
  #%% Interactive driver
590
609
 
591
610
  if False:
592
-
611
+
593
612
  pass
594
613
 
595
614
  #%% Options
596
-
615
+
597
616
  input_file = os.path.expanduser('~/data/md-test-coco.json')
598
- image_folder = os.path.expanduser('~/data/md-test')
599
- output_folder = os.path.expanduser('~/data/md-test-yolo')
617
+ image_folder = os.path.expanduser('~/data/md-test')
618
+ output_folder = os.path.expanduser('~/data/md-test-yolo')
600
619
  create_image_and_label_folders=False
601
620
  class_file_name='classes.txt'
602
621
  allow_empty_annotations=False
@@ -605,18 +624,18 @@ if False:
605
624
  images_to_exclude=None
606
625
  path_replacement_char='#'
607
626
  category_names_to_exclude=None
608
-
609
-
627
+
628
+
610
629
  #%% Programmatic execution
611
-
630
+
612
631
  coco_to_yolo_results = coco_to_yolo(image_folder,output_folder,input_file,
613
632
  source_format='coco',
614
633
  overwrite_images=False,
615
634
  create_image_and_label_folders=create_image_and_label_folders,
616
635
  class_file_name=class_file_name,
617
636
  allow_empty_annotations=allow_empty_annotations,
618
- clip_boxes=clip_boxes)
619
-
637
+ clip_boxes=clip_boxes)
638
+
620
639
  create_yolo_symlinks(source_folder=output_folder,
621
640
  images_folder=output_folder + '/images',
622
641
  labels_folder=output_folder + '/labels',
@@ -630,40 +649,38 @@ if False:
630
649
  s = 'python coco_to_yolo.py {} {} {} --create_bounding_box_editor_symlinks'.format(
631
650
  image_folder,output_folder,input_file)
632
651
  print(s)
633
- import clipboard; clipboard.copy(s)
652
+ import clipboard; clipboard.copy(s)
634
653
 
635
654
 
636
655
  #%% Command-line driver
637
656
 
638
- import sys,argparse
639
-
640
- def main():
657
+ def main(): # noqa
641
658
 
642
659
  parser = argparse.ArgumentParser(
643
660
  description='Convert COCO-formatted data to YOLO format, flattening the image structure')
644
-
661
+
645
662
  # input_image_folder,output_folder,input_file
646
-
663
+
647
664
  parser.add_argument(
648
665
  'input_folder',
649
666
  type=str,
650
667
  help='Path to input images')
651
-
668
+
652
669
  parser.add_argument(
653
670
  'output_folder',
654
671
  type=str,
655
672
  help='Path to flat, YOLO-formatted dataset')
656
-
673
+
657
674
  parser.add_argument(
658
675
  'input_file',
659
676
  type=str,
660
677
  help='Path to COCO dataset file (.json)')
661
-
678
+
662
679
  parser.add_argument(
663
680
  '--create_bounding_box_editor_symlinks',
664
681
  action='store_true',
665
- help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
666
-
682
+ help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
683
+
667
684
  if len(sys.argv[1:]) == 0:
668
685
  parser.print_help()
669
686
  parser.exit()
@@ -671,7 +688,7 @@ def main():
671
688
  args = parser.parse_args()
672
689
 
673
690
  coco_to_yolo_results = coco_to_yolo(args.input_folder,args.output_folder,args.input_file)
674
-
691
+
675
692
  if args.create_bounding_box_editor_symlinks:
676
693
  create_yolo_symlinks(source_folder=args.output_folder,
677
694
  images_folder=args.output_folder + '/images',
@@ -679,6 +696,6 @@ def main():
679
696
  class_list_file=coco_to_yolo_results['class_list_filename'],
680
697
  class_list_output_name='object.data',
681
698
  force_lowercase_image_extension=True)
682
-
699
+
683
700
  if __name__ == '__main__':
684
701
  main()