megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ the dataset (to a single folder) in the process.
8
8
  If the input and output folders are the same, writes .txt files to the input folder,
9
9
  and neither moves nor modifies images.
10
10
 
11
- Currently ignores segmentation masks, and errors if an annotation has a
11
+ Currently ignores segmentation masks, and errors if an annotation has a
12
12
  segmentation polygon but no bbox.
13
13
 
14
14
  Has only been tested on a handful of COCO Camera Traps data sets; if you
@@ -21,6 +21,8 @@ use it for more general COCO conversion, YMMV.
21
21
  import json
22
22
  import os
23
23
  import shutil
24
+ import sys
25
+ import argparse
24
26
 
25
27
  from collections import defaultdict
26
28
  from tqdm import tqdm
@@ -38,34 +40,34 @@ def write_yolo_dataset_file(yolo_dataset_file,
38
40
  test_folder_relative=None):
39
41
  """
40
42
  Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
41
- have a .yaml extension, though it's only a warning if it doesn't).
42
-
43
+ have a .yaml extension, though it's only a warning if it doesn't).
44
+
43
45
  Args:
44
- yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
46
+ yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
45
47
  Does not have to be within dataset_base_dir.
46
48
  dataset_base_dir (str): the absolute base path of the YOLO dataset
47
- class_list (list or str): an ordered list of class names (the first item will be class 0,
48
- etc.), or the name of a text file containing an ordered list of class names (one per
49
+ class_list (list or str): an ordered list of class names (the first item will be class 0,
50
+ etc.), or the name of a text file containing an ordered list of class names (one per
49
51
  line, starting from class zero).
50
52
  train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
51
53
  val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
52
- test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
54
+ test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
53
55
  """
54
-
56
+
55
57
  # Read class names
56
58
  if isinstance(class_list,str):
57
59
  with open(class_list,'r') as f:
58
60
  class_lines = f.readlines()
59
- class_lines = [s.strip() for s in class_lines]
61
+ class_lines = [s.strip() for s in class_lines]
60
62
  class_list = [s for s in class_lines if len(s) > 0]
61
63
 
62
64
  if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
63
65
  print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
64
66
  yolo_dataset_file))
65
-
67
+
66
68
  # Write dataset.yaml
67
69
  with open(yolo_dataset_file,'w') as f:
68
-
70
+
69
71
  f.write('# Train/val sets\n')
70
72
  f.write('path: {}\n'.format(dataset_base_dir))
71
73
  if train_folder_relative is not None:
@@ -74,9 +76,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
74
76
  f.write('val: {}\n'.format(val_folder_relative))
75
77
  if test_folder_relative is not None:
76
78
  f.write('val: {}\n'.format(test_folder_relative))
77
-
79
+
78
80
  f.write('\n')
79
-
81
+
80
82
  f.write('# Classes\n')
81
83
  f.write('names:\n')
82
84
  for i_class,class_name in enumerate(class_list):
@@ -84,7 +86,7 @@ def write_yolo_dataset_file(yolo_dataset_file,
84
86
 
85
87
  # ...def write_yolo_dataset_file(...)
86
88
 
87
-
89
+
88
90
  def coco_to_yolo(input_image_folder,
89
91
  output_folder,
90
92
  input_file,
@@ -102,15 +104,15 @@ def coco_to_yolo(input_image_folder,
102
104
  write_output=True,
103
105
  flatten_paths=False):
104
106
  """
105
- Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
107
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
106
108
  dataset to a single folder in the process.
107
-
109
+
108
110
  If the input and output folders are the same, writes .txt files to the input folder,
109
111
  and neither moves nor modifies images.
110
-
111
- Currently ignores segmentation masks, and errors if an annotation has a
112
+
113
+ Currently ignores segmentation masks, and errors if an annotation has a
112
114
  segmentation polygon but no bbox.
113
-
115
+
114
116
  Args:
115
117
  input_image_folder (str): the folder where images live; filenames in the COCO .json
116
118
  file [input_file] should be relative to this folder
@@ -119,88 +121,88 @@ def coco_to_yolo(input_image_folder,
119
121
  images are left alone.
120
122
  source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
121
123
  is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
122
- annotation as a special case, i.e. that's how an empty image is indicated. The original
123
- COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
124
+ annotation as a special case, i.e. that's how an empty image is indicated. The original
125
+ COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
124
126
  either treat images as empty or error, depending on the value of [allow_empty_annotations].
125
127
  [allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
126
128
  overwrite_images (bool, optional): over-write images in the output folder if they exist
127
129
  create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
128
- 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
129
- a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
130
- be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
131
- images/a#b#c#image001.jpg, and the corresponding text file will be
132
- labels/a#b#c#image001.txt.
133
- class_file_name (str, optional): .txt file (relative to the output folder) that we should
130
+ 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
131
+ a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
132
+ be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
133
+ images/a#b#c#image001.jpg, and the corresponding text file will be
134
+ labels/a#b#c#image001.txt.
135
+ class_file_name (str, optional): .txt file (relative to the output folder) that we should
134
136
  populate with a list of classes (or None to omit)
135
- allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
137
+ allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
136
138
  we'll error on annotations that have no 'bbox' field
137
139
  clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
138
140
  converting to YOLO xywh format
139
141
  image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
140
142
  a mapping from image IDs to output file names
141
- images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
143
+ images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
142
144
  should ignore
143
145
  path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
144
146
  path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
145
147
  becomes a#b#c#d.jpg
146
148
  category_names_to_exclude (str, optional): category names that should not be represented in the
147
149
  YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
148
- you would want to specify this and [category_names_to_include].
149
- category_names_to_include (str, optional): allow-list of category names that should be represented
150
- in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
151
- no reason you would want to specify this and [category_names_to_exclude].
150
+ you would want to specify this and [category_names_to_include].
151
+ category_names_to_include (str, optional): allow-list of category names that should be represented
152
+ in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
153
+ no reason you would want to specify this and [category_names_to_exclude].
152
154
  write_output (bool, optional): determines whether we actually copy images and write annotations;
153
155
  setting this to False mostly puts this function in "dry run" "mode. The class list
154
156
  file is written regardless of the value of write_output.
155
157
  flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
156
158
  which ensures that the output folder is a single flat folder.
157
-
159
+
158
160
  Returns:
159
161
  dict: information about the coco --> yolo mapping, containing at least the fields:
160
-
161
- - class_list_filename: the filename to which we wrote the flat list of class names required
162
+
163
+ - class_list_filename: the filename to which we wrote the flat list of class names required
162
164
  by the YOLO format.
163
165
  - source_image_to_dest_image: a dict mapping source images to destination images
164
- - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
166
+ - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
165
167
  """
166
-
168
+
167
169
  ## Validate input
168
-
170
+
169
171
  if category_names_to_include is not None and category_names_to_exclude is not None:
170
172
  raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
171
-
173
+
172
174
  if output_folder is None:
173
175
  output_folder = input_image_folder
174
-
176
+
175
177
  if images_to_exclude is not None:
176
178
  images_to_exclude = set(images_to_exclude)
177
-
179
+
178
180
  if category_names_to_exclude is None:
179
181
  category_names_to_exclude = {}
180
-
182
+
181
183
  assert os.path.isdir(input_image_folder)
182
184
  assert os.path.isfile(input_file)
183
185
  os.makedirs(output_folder,exist_ok=True)
184
-
186
+
185
187
  if (output_folder == input_image_folder) and (overwrite_images) and \
186
188
  (not create_image_and_label_folders) and (not flatten_paths):
187
189
  print('Warning: output folder and input folder are the same, disabling overwrite_images')
188
190
  overwrite_images = False
189
-
191
+
190
192
  ## Read input data
191
-
193
+
192
194
  with open(input_file,'r') as f:
193
195
  data = json.load(f)
194
-
195
-
196
+
197
+
196
198
  ## Parse annotations
197
-
199
+
198
200
  image_id_to_annotations = defaultdict(list)
199
-
201
+
200
202
  # i_ann = 0; ann = data['annotations'][0]
201
203
  for i_ann,ann in enumerate(data['annotations']):
202
-
203
- # Make sure no annotations have *only* segmentation data
204
+
205
+ # Make sure no annotations have *only* segmentation data
204
206
  if ( \
205
207
  ('segmentation' in ann.keys()) and \
206
208
  (ann['segmentation'] is not None) and \
@@ -209,21 +211,21 @@ def coco_to_yolo(input_image_folder,
209
211
  (('bbox' not in ann.keys()) or (ann['bbox'] is None) or (len(ann['bbox'])==0)):
210
212
  raise ValueError('Oops: segmentation data present without bbox information, ' + \
211
213
  'this script isn\'t ready for this dataset')
212
-
214
+
213
215
  image_id_to_annotations[ann['image_id']].append(ann)
214
-
216
+
215
217
  print('Parsed annotations for {} images'.format(len(image_id_to_annotations)))
216
-
218
+
217
219
  # Re-map class IDs to make sure they run from 0...n-classes-1
218
220
  #
219
221
  # Note: this allows unused categories in the output data set. This is OK for
220
222
  # some training pipelines, not for others.
221
223
  next_category_id = 0
222
- coco_id_to_yolo_id = {}
224
+ coco_id_to_yolo_id = {}
223
225
  coco_id_to_name = {}
224
226
  yolo_id_to_name = {}
225
227
  coco_category_ids_to_exclude = set()
226
-
228
+
227
229
  for category in data['categories']:
228
230
  coco_id_to_name[category['id']] = category['name']
229
231
  if (category_names_to_include is not None) and \
@@ -232,86 +234,86 @@ def coco_to_yolo(input_image_folder,
232
234
  continue
233
235
  elif (category['name'] in category_names_to_exclude):
234
236
  coco_category_ids_to_exclude.add(category['id'])
235
- continue
237
+ continue
236
238
  assert category['id'] not in coco_id_to_yolo_id
237
239
  coco_id_to_yolo_id[category['id']] = next_category_id
238
240
  yolo_id_to_name[next_category_id] = category['name']
239
241
  next_category_id += 1
240
-
241
-
242
+
243
+
242
244
  ## Process images (everything but I/O)
243
-
245
+
244
246
  # List of dictionaries with keys 'source_image','dest_image','bboxes','dest_txt'
245
247
  images_to_copy = []
246
-
248
+
247
249
  missing_images = []
248
250
  excluded_images = []
249
-
251
+
250
252
  image_names = set()
251
-
253
+
252
254
  typical_image_extensions = set(['.jpg','.jpeg','.png','.gif','.tif','.bmp'])
253
-
255
+
254
256
  printed_empty_annotation_warning = False
255
-
257
+
256
258
  image_id_to_output_image_name = {}
257
-
259
+
258
260
  print('Processing annotations')
259
-
261
+
260
262
  n_clipped_boxes = 0
261
263
  n_total_boxes = 0
262
-
264
+
263
265
  # i_image = 0; im = data['images'][i_image]
264
266
  for i_image,im in tqdm(enumerate(data['images']),total=len(data['images'])):
265
-
267
+
266
268
  output_info = {}
267
- source_image = os.path.join(input_image_folder,im['file_name'])
269
+ source_image = os.path.join(input_image_folder,im['file_name'])
268
270
  output_info['source_image'] = source_image
269
-
271
+
270
272
  if images_to_exclude is not None and im['file_name'] in images_to_exclude:
271
273
  excluded_images.append(im['file_name'])
272
274
  continue
273
-
275
+
274
276
  tokens = os.path.splitext(im['file_name'])
275
277
  if tokens[1].lower() not in typical_image_extensions:
276
278
  print('Warning: unusual image file name {}'.format(im['file_name']))
277
-
279
+
278
280
  if flatten_paths:
279
281
  image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
280
- '_' + str(i_image).zfill(6)
282
+ '_' + str(i_image).zfill(6)
281
283
  else:
282
284
  image_name = tokens[0]
283
285
 
284
286
  assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
285
287
  image_names.add(image_name)
286
-
288
+
287
289
  assert im['id'] not in image_id_to_output_image_name
288
290
  image_id_to_output_image_name[im['id']] = image_name
289
-
291
+
290
292
  dest_image_relative = image_name + tokens[1]
291
293
  output_info['dest_image_relative'] = dest_image_relative
292
294
  dest_txt_relative = image_name + '.txt'
293
295
  output_info['dest_txt_relative'] = dest_txt_relative
294
296
  output_info['bboxes'] = []
295
-
297
+
296
298
  # assert os.path.isfile(source_image), 'Could not find image {}'.format(source_image)
297
299
  if not os.path.isfile(source_image):
298
300
  print('Warning: could not find image {}'.format(source_image))
299
301
  missing_images.append(im['file_name'])
300
302
  continue
301
-
303
+
302
304
  image_id = im['id']
303
-
305
+
304
306
  image_bboxes = []
305
-
307
+
306
308
  if image_id in image_id_to_annotations:
307
-
309
+
308
310
  for ann in image_id_to_annotations[image_id]:
309
-
311
+
310
312
  # If this annotation has no bounding boxes...
311
313
  if 'bbox' not in ann or ann['bbox'] is None or len(ann['bbox']) == 0:
312
-
314
+
313
315
  if source_format == 'coco':
314
-
316
+
315
317
  if not allow_empty_annotations:
316
318
  # This is not entirely clear from the COCO spec, but it seems to be consensus
317
319
  # that if you want to specify an image with no objects, you don't include any
@@ -319,11 +321,11 @@ def coco_to_yolo(input_image_folder,
319
321
  raise ValueError('If an annotation exists, it should have content')
320
322
  else:
321
323
  continue
322
-
324
+
323
325
  elif source_format == 'coco_camera_traps':
324
-
325
- # We allow empty bbox lists in COCO camera traps files; this is typically a
326
- # negative example in a dataset that has bounding boxes, and 0 is typically
326
+
327
+ # We allow empty bbox lists in COCO camera traps files; this is typically a
328
+ # negative example in a dataset that has bounding boxes, and 0 is typically
327
329
  # the empty category, which is typically 0.
328
330
  if ann['category_id'] != 0:
329
331
  if not printed_empty_annotation_warning:
@@ -331,113 +333,113 @@ def coco_to_yolo(input_image_folder,
331
333
  print('Warning: non-bbox annotation found with category {}'.format(
332
334
  ann['category_id']))
333
335
  continue
334
-
336
+
335
337
  else:
336
-
338
+
337
339
  raise ValueError('Unrecognized COCO variant: {}'.format(source_format))
338
-
340
+
339
341
  # ...if this is an empty annotation
340
-
342
+
341
343
  coco_bbox = ann['bbox']
342
-
344
+
343
345
  # This category isn't in our category list. This typically corresponds to whole sets
344
346
  # of images that were excluded from the YOLO set.
345
347
  if ann['category_id'] in coco_category_ids_to_exclude:
346
348
  continue
347
-
349
+
348
350
  yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
349
-
351
+
350
352
  # COCO: [x_min, y_min, width, height] in absolute coordinates
351
353
  # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
352
-
354
+
353
355
  # Convert from COCO coordinates to YOLO coordinates
354
356
  img_w = im['width']
355
357
  img_h = im['height']
356
-
358
+
357
359
  if source_format in ('coco','coco_camera_traps'):
358
-
360
+
359
361
  x_min_absolute = coco_bbox[0]
360
362
  y_min_absolute = coco_bbox[1]
361
363
  box_w_absolute = coco_bbox[2]
362
364
  box_h_absolute = coco_bbox[3]
363
-
365
+
364
366
  x_center_absolute = (x_min_absolute + (x_min_absolute + box_w_absolute)) / 2
365
367
  y_center_absolute = (y_min_absolute + (y_min_absolute + box_h_absolute)) / 2
366
-
368
+
367
369
  x_center_relative = x_center_absolute / img_w
368
370
  y_center_relative = y_center_absolute / img_h
369
-
371
+
370
372
  box_w_relative = box_w_absolute / img_w
371
373
  box_h_relative = box_h_absolute / img_h
372
-
374
+
373
375
  else:
374
-
376
+
375
377
  raise ValueError('Unrecognized source format {}'.format(source_format))
376
-
378
+
377
379
  if clip_boxes:
378
-
380
+
379
381
  clipped_box = False
380
-
381
- box_right = x_center_relative + (box_w_relative / 2.0)
382
+
383
+ box_right = x_center_relative + (box_w_relative / 2.0)
382
384
  if box_right > 1.0:
383
385
  clipped_box = True
384
386
  overhang = box_right - 1.0
385
387
  box_w_relative -= overhang
386
388
  x_center_relative -= (overhang / 2.0)
387
389
 
388
- box_bottom = y_center_relative + (box_h_relative / 2.0)
390
+ box_bottom = y_center_relative + (box_h_relative / 2.0)
389
391
  if box_bottom > 1.0:
390
392
  clipped_box = True
391
393
  overhang = box_bottom - 1.0
392
394
  box_h_relative -= overhang
393
395
  y_center_relative -= (overhang / 2.0)
394
-
396
+
395
397
  box_left = x_center_relative - (box_w_relative / 2.0)
396
398
  if box_left < 0.0:
397
399
  clipped_box = True
398
400
  overhang = abs(box_left)
399
401
  box_w_relative -= overhang
400
402
  x_center_relative += (overhang / 2.0)
401
-
403
+
402
404
  box_top = y_center_relative - (box_h_relative / 2.0)
403
405
  if box_top < 0.0:
404
406
  clipped_box = True
405
407
  overhang = abs(box_top)
406
408
  box_h_relative -= overhang
407
409
  y_center_relative += (overhang / 2.0)
408
-
410
+
409
411
  if clipped_box:
410
412
  n_clipped_boxes += 1
411
-
413
+
412
414
  yolo_box = [yolo_category_id,
413
- x_center_relative, y_center_relative,
415
+ x_center_relative, y_center_relative,
414
416
  box_w_relative, box_h_relative]
415
-
417
+
416
418
  image_bboxes.append(yolo_box)
417
419
  n_total_boxes += 1
418
-
419
- # ...for each annotation
420
-
420
+
421
+ # ...for each annotation
422
+
421
423
  # ...if this image has annotations
422
-
424
+
423
425
  output_info['bboxes'] = image_bboxes
424
-
425
- images_to_copy.append(output_info)
426
-
426
+
427
+ images_to_copy.append(output_info)
428
+
427
429
  # ...for each image
428
-
430
+
429
431
  print('\nWriting {} boxes ({} clipped) for {} images'.format(n_total_boxes,
430
432
  n_clipped_boxes,len(images_to_copy)))
431
433
  print('{} missing images (of {})'.format(len(missing_images),len(data['images'])))
432
-
434
+
433
435
  if images_to_exclude is not None:
434
436
  print('{} excluded images (of {})'.format(len(excluded_images),len(data['images'])))
435
-
436
-
437
+
438
+
437
439
  ## Write output
438
-
440
+
439
441
  print('Generating class list')
440
-
442
+
441
443
  if class_file_name is not None:
442
444
  class_list_filename = os.path.join(output_folder,class_file_name)
443
445
  with open(class_list_filename, 'w') as f:
@@ -446,12 +448,12 @@ def coco_to_yolo(input_image_folder,
446
448
  # Category IDs should range from 0..N-1
447
449
  assert i_class in yolo_id_to_name
448
450
  f.write(yolo_id_to_name[i_class] + '\n')
449
-
451
+
450
452
  if image_id_to_output_image_json_file is not None:
451
453
  print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
452
454
  with open(image_id_to_output_image_json_file,'w') as f:
453
455
  json.dump(image_id_to_output_image_name,f,indent=1)
454
-
456
+
455
457
 
456
458
  if (output_folder == input_image_folder) and (not create_image_and_label_folders):
457
459
  print('Creating annotation files (not copying images, input and output folder are the same)')
@@ -464,12 +466,12 @@ def coco_to_yolo(input_image_folder,
464
466
  else:
465
467
  dest_image_folder = output_folder
466
468
  dest_txt_folder = output_folder
467
-
469
+
468
470
  source_image_to_dest_image = {}
469
-
471
+
470
472
  label_files_written = []
471
473
  n_boxes_written = 0
472
-
474
+
473
475
  # TODO: parallelize this loop
474
476
  #
475
477
  # output_info = images_to_copy[0]
@@ -478,61 +480,61 @@ def coco_to_yolo(input_image_folder,
478
480
  source_image = output_info['source_image']
479
481
  dest_image_relative = output_info['dest_image_relative']
480
482
  dest_txt_relative = output_info['dest_txt_relative']
481
-
483
+
482
484
  dest_image = os.path.join(dest_image_folder,dest_image_relative)
483
485
  dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
484
-
486
+
485
487
  source_image_to_dest_image[source_image] = dest_image
486
-
488
+
487
489
  # Copy the image if necessary
488
490
  if write_output:
489
-
490
- os.makedirs(os.path.dirname(dest_image),exist_ok=True)
491
+
492
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
491
493
  os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
492
-
494
+
493
495
  if not create_image_and_label_folders:
494
496
  assert os.path.dirname(dest_image) == os.path.dirname(dest_txt)
495
-
497
+
496
498
  if (not os.path.isfile(dest_image)) or (overwrite_images):
497
499
  shutil.copyfile(source_image,dest_image)
498
-
499
- bboxes = output_info['bboxes']
500
-
500
+
501
+ bboxes = output_info['bboxes']
502
+
501
503
  # Write the annotation file if necessary
502
504
  #
503
- # Only write an annotation file if there are bounding boxes. Images with
505
+ # Only write an annotation file if there are bounding boxes. Images with
504
506
  # no .txt files are treated as hard negatives, at least by YOLOv5:
505
507
  #
506
508
  # https://github.com/ultralytics/yolov5/issues/3218
507
509
  #
508
- # I think this is also true for images with empty .txt files, but
509
- # I'm using the convention suggested on that issue, i.e. hard
510
+ # I think this is also true for images with empty .txt files, but
511
+ # I'm using the convention suggested on that issue, i.e. hard
510
512
  # negatives are expressed as images without .txt files.
511
513
  if len(bboxes) > 0:
512
-
514
+
513
515
  n_boxes_written += len(bboxes)
514
516
  label_files_written.append(dest_txt)
515
-
517
+
516
518
  if write_output:
517
-
519
+
518
520
  with open(dest_txt,'w') as f:
519
-
521
+
520
522
  # bbox = bboxes[0]
521
523
  for bbox in bboxes:
522
524
  assert len(bbox) == 5
523
525
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
524
526
  f.write(s + '\n')
525
-
526
-
527
+
528
+
527
529
  # ...for each image
528
-
530
+
529
531
  coco_to_yolo_info = {}
530
532
  coco_to_yolo_info['class_list_filename'] = class_list_filename
531
533
  coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
532
534
  coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
533
535
  coco_to_yolo_info['label_files_written'] = label_files_written
534
536
  coco_to_yolo_info['n_boxes_written'] = n_boxes_written
535
-
537
+
536
538
  return coco_to_yolo_info
537
539
 
538
540
  # ...def coco_to_yolo(...)
@@ -544,30 +546,30 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
544
546
  force_lowercase_image_extension=False):
545
547
  """
546
548
  Given a YOLO-formatted folder of images and .txt files, creates a folder
547
- of symlinks to all the images, and a folder of symlinks to all the labels.
548
- Used to support preview/editing tools that assume images and labels are in separate
549
+ of symlinks to all the images, and a folder of symlinks to all the labels.
550
+ Used to support preview/editing tools that assume images and labels are in separate
549
551
  folders.
550
-
552
+
551
553
  :meta private:
552
- """
553
-
554
+ """
555
+
554
556
  assert source_folder != images_folder and source_folder != labels_folder
555
-
557
+
556
558
  os.makedirs(images_folder,exist_ok=True)
557
559
  os.makedirs(labels_folder,exist_ok=True)
558
-
560
+
559
561
  image_files_relative = find_images(source_folder,recursive=True,return_relative_paths=True)
560
-
561
- # image_fn_relative = image_files_relative[0]=
562
+
563
+ # image_fn_relative = image_files_relative[0]=
562
564
  for image_fn_relative in tqdm(image_files_relative):
563
-
565
+
564
566
  source_file_abs = os.path.join(source_folder,image_fn_relative)
565
567
  target_file_abs = os.path.join(images_folder,image_fn_relative)
566
-
568
+
567
569
  if force_lowercase_image_extension:
568
570
  tokens = os.path.splitext(target_file_abs)
569
571
  target_file_abs = tokens[0] + tokens[1].lower()
570
-
572
+
571
573
  os.makedirs(os.path.dirname(target_file_abs),exist_ok=True)
572
574
  safe_create_link(source_file_abs,target_file_abs)
573
575
  source_annotation_file_abs = os.path.splitext(source_file_abs)[0] + '.txt'
@@ -577,7 +579,7 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
577
579
  os.makedirs(os.path.dirname(target_annotation_file_abs),exist_ok=True)
578
580
  safe_create_link(source_annotation_file_abs,target_annotation_file_abs)
579
581
 
580
- # ...for each image
582
+ # ...for each image
581
583
 
582
584
  if class_list_file is not None:
583
585
  target_class_list_file = os.path.join(labels_folder,class_list_output_name)
@@ -589,14 +591,14 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
589
591
  #%% Interactive driver
590
592
 
591
593
  if False:
592
-
594
+
593
595
  pass
594
596
 
595
597
  #%% Options
596
-
598
+
597
599
  input_file = os.path.expanduser('~/data/md-test-coco.json')
598
- image_folder = os.path.expanduser('~/data/md-test')
599
- output_folder = os.path.expanduser('~/data/md-test-yolo')
600
+ image_folder = os.path.expanduser('~/data/md-test')
601
+ output_folder = os.path.expanduser('~/data/md-test-yolo')
600
602
  create_image_and_label_folders=False
601
603
  class_file_name='classes.txt'
602
604
  allow_empty_annotations=False
@@ -605,18 +607,18 @@ if False:
605
607
  images_to_exclude=None
606
608
  path_replacement_char='#'
607
609
  category_names_to_exclude=None
608
-
609
-
610
+
611
+
610
612
  #%% Programmatic execution
611
-
613
+
612
614
  coco_to_yolo_results = coco_to_yolo(image_folder,output_folder,input_file,
613
615
  source_format='coco',
614
616
  overwrite_images=False,
615
617
  create_image_and_label_folders=create_image_and_label_folders,
616
618
  class_file_name=class_file_name,
617
619
  allow_empty_annotations=allow_empty_annotations,
618
- clip_boxes=clip_boxes)
619
-
620
+ clip_boxes=clip_boxes)
621
+
620
622
  create_yolo_symlinks(source_folder=output_folder,
621
623
  images_folder=output_folder + '/images',
622
624
  labels_folder=output_folder + '/labels',
@@ -630,40 +632,38 @@ if False:
630
632
  s = 'python coco_to_yolo.py {} {} {} --create_bounding_box_editor_symlinks'.format(
631
633
  image_folder,output_folder,input_file)
632
634
  print(s)
633
- import clipboard; clipboard.copy(s)
635
+ import clipboard; clipboard.copy(s)
634
636
 
635
637
 
636
638
  #%% Command-line driver
637
639
 
638
- import sys,argparse
639
-
640
- def main():
640
+ def main(): # noqa
641
641
 
642
642
  parser = argparse.ArgumentParser(
643
643
  description='Convert COCO-formatted data to YOLO format, flattening the image structure')
644
-
644
+
645
645
  # input_image_folder,output_folder,input_file
646
-
646
+
647
647
  parser.add_argument(
648
648
  'input_folder',
649
649
  type=str,
650
650
  help='Path to input images')
651
-
651
+
652
652
  parser.add_argument(
653
653
  'output_folder',
654
654
  type=str,
655
655
  help='Path to flat, YOLO-formatted dataset')
656
-
656
+
657
657
  parser.add_argument(
658
658
  'input_file',
659
659
  type=str,
660
660
  help='Path to COCO dataset file (.json)')
661
-
661
+
662
662
  parser.add_argument(
663
663
  '--create_bounding_box_editor_symlinks',
664
664
  action='store_true',
665
- help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
666
-
665
+ help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
666
+
667
667
  if len(sys.argv[1:]) == 0:
668
668
  parser.print_help()
669
669
  parser.exit()
@@ -671,7 +671,7 @@ def main():
671
671
  args = parser.parse_args()
672
672
 
673
673
  coco_to_yolo_results = coco_to_yolo(args.input_folder,args.output_folder,args.input_file)
674
-
674
+
675
675
  if args.create_bounding_box_editor_symlinks:
676
676
  create_yolo_symlinks(source_folder=args.output_folder,
677
677
  images_folder=args.output_folder + '/images',
@@ -679,6 +679,6 @@ def main():
679
679
  class_list_file=coco_to_yolo_results['class_list_filename'],
680
680
  class_list_output_name='object.data',
681
681
  force_lowercase_image_extension=True)
682
-
682
+
683
683
  if __name__ == '__main__':
684
684
  main()