megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
 
3
3
  yolo_to_coco.py
4
4
 
5
- Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
5
+ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
6
 
7
7
  """
8
8
 
@@ -31,6 +31,7 @@ def _filename_to_image_id(fn):
31
31
  """
32
32
  Image IDs can't have spaces in them, replace spaces with underscores
33
33
  """
34
+
34
35
  return fn.replace(' ','_').replace('\\','/')
35
36
 
36
37
 
@@ -38,27 +39,27 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
38
39
  """
39
40
  Internal support function for processing one image's labels.
40
41
  """
41
-
42
+
42
43
  # Create the image object for this image
43
44
  #
44
45
  # Always use forward slashes in image filenames and IDs
45
46
  image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
46
47
  image_id = _filename_to_image_id(image_fn_relative)
47
-
48
+
48
49
  # This is done in a separate loop now
49
50
  #
50
51
  # assert image_id not in image_ids, \
51
52
  # 'Oops, you have hit a very esoteric case where you have the same filename ' + \
52
53
  # 'with both spaces and underscores, this is not currently handled.'
53
54
  # image_ids.add(image_id)
54
-
55
+
55
56
  im = {}
56
57
  im['file_name'] = image_fn_relative
57
58
  im['id'] = image_id
58
-
59
+
59
60
  annotations_this_image = []
60
-
61
- try:
61
+
62
+ try:
62
63
  pil_im = open_image(fn_abs)
63
64
  im_width, im_height = pil_im.size
64
65
  im['width'] = im_width
@@ -70,32 +71,32 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
70
71
  im['height'] = -1
71
72
  im['error'] = str(e)
72
73
  return (im,annotations_this_image)
73
-
74
+
74
75
  # Is there an annotation file for this image?
75
76
  if label_folder is not None:
76
77
  assert input_folder in fn_abs
77
78
  label_file_abs_base = fn_abs.replace(input_folder,label_folder)
78
79
  else:
79
80
  label_file_abs_base = fn_abs
80
-
81
+
81
82
  annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
82
83
  if not os.path.isfile(annotation_file):
83
84
  annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
84
-
85
+
85
86
  if os.path.isfile(annotation_file):
86
-
87
+
87
88
  with open(annotation_file,'r') as f:
88
89
  lines = f.readlines()
89
90
  lines = [s.strip() for s in lines]
90
-
91
+
91
92
  # s = lines[0]
92
93
  annotation_number = 0
93
-
94
+
94
95
  for s in lines:
95
-
96
+
96
97
  if len(s.strip()) == 0:
97
98
  continue
98
-
99
+
99
100
  tokens = s.split()
100
101
  assert len(tokens) == 5
101
102
  category_id = int(tokens[0])
@@ -107,35 +108,35 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
107
108
  ann['image_id'] = im['id']
108
109
  ann['category_id'] = category_id
109
110
  ann['sequence_level_annotation'] = False
110
-
111
+
111
112
  # COCO: [x_min, y_min, width, height] in absolute coordinates
112
113
  # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
113
-
114
+
114
115
  yolo_bbox = [float(x) for x in tokens[1:]]
115
-
116
+
116
117
  normalized_x_center = yolo_bbox[0]
117
118
  normalized_y_center = yolo_bbox[1]
118
119
  normalized_width = yolo_bbox[2]
119
120
  normalized_height = yolo_bbox[3]
120
-
121
- absolute_x_center = normalized_x_center * im_width
121
+
122
+ absolute_x_center = normalized_x_center * im_width
122
123
  absolute_y_center = normalized_y_center * im_height
123
124
  absolute_width = normalized_width * im_width
124
125
  absolute_height = normalized_height * im_height
125
126
  absolute_x_min = absolute_x_center - absolute_width / 2
126
127
  absolute_y_min = absolute_y_center - absolute_height / 2
127
-
128
+
128
129
  coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
129
-
130
+
130
131
  ann['bbox'] = coco_bbox
131
132
  annotation_number += 1
132
-
133
- annotations_this_image.append(ann)
134
-
135
- # ...for each annotation
136
-
133
+
134
+ annotations_this_image.append(ann)
135
+
136
+ # ...for each annotation
137
+
137
138
  # ...if this image has annotations
138
-
139
+
139
140
  return (im,annotations_this_image)
140
141
 
141
142
  # ...def _process_image(...)
@@ -144,37 +145,37 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
144
145
  def load_yolo_class_list(class_name_file):
145
146
  """
146
147
  Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
147
- [class_name_file].
148
-
148
+ [class_name_file].
149
+
149
150
  Args:
150
151
  class_name_file (str or list): this can be:
151
152
  - a .yaml or .yaml file in YOLO's dataset.yaml format
152
153
  - a .txt or .data file containing a flat list of class names
153
154
  - a list of class names
154
-
155
+
155
156
  Returns:
156
157
  dict: A dict mapping zero-indexed integer IDs to class names
157
158
  """
158
-
159
+
159
160
  # class_name_file can also be a list of class names
160
161
  if isinstance(class_name_file,list):
161
162
  category_id_to_name = {}
162
163
  for i_name,name in enumerate(class_name_file):
163
164
  category_id_to_name[i_name] = name
164
165
  return category_id_to_name
165
-
166
+
166
167
  ext = os.path.splitext(class_name_file)[1][1:]
167
168
  assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
168
169
  class_name_file)
169
-
170
+
170
171
  if ext in ('txt','data'):
171
-
172
+
172
173
  with open(class_name_file,'r') as f:
173
174
  lines = f.readlines()
174
175
  assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
175
176
  class_names = [s.strip() for s in lines]
176
177
  assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
177
-
178
+
178
179
  # Blank lines should only appear at the end
179
180
  b_found_blank = False
180
181
  for s in lines:
@@ -183,17 +184,17 @@ def load_yolo_class_list(class_name_file):
183
184
  elif b_found_blank:
184
185
  raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
185
186
  class_name_file))
186
-
187
- category_id_to_name = {}
187
+
188
+ category_id_to_name = {}
188
189
  for i_category_id,category_name in enumerate(class_names):
189
190
  assert len(category_name) > 0
190
191
  category_id_to_name[i_category_id] = category_name
191
-
192
+
192
193
  else:
193
-
194
+
194
195
  assert ext in ('yml','yaml')
195
196
  category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
196
-
197
+
197
198
  return category_id_to_name
198
199
 
199
200
  # ...load_yolo_class_list(...)
@@ -202,91 +203,91 @@ def load_yolo_class_list(class_name_file):
202
203
  def validate_label_file(label_file,category_id_to_name=None,verbose=False):
203
204
  """"
204
205
  Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
205
-
206
+
206
207
  Args:
207
208
  label_file (str): the .txt file to validate
208
209
  category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
209
210
  if this is not None, this function errors if the file uses a category that's not
210
211
  in this dict
211
212
  verbose (bool, optional): enable additional debug console output
212
-
213
+
213
214
  Returns:
214
- dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
215
+ dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
215
216
  errors (if any) that we found in this file)
216
217
  """
217
-
218
+
218
219
  label_result = {}
219
220
  label_result['file'] = label_file
220
221
  label_result['errors'] = []
221
-
222
+
222
223
  try:
223
224
  with open(label_file,'r') as f:
224
225
  lines = f.readlines()
225
226
  except Exception as e:
226
227
  label_result['errors'].append('Read error: {}'.format(str(e)))
227
228
  return label_result
228
-
229
+
229
230
  # i_line 0; line = lines[i_line]
230
231
  for i_line,line in enumerate(lines):
231
232
  s = line.strip()
232
233
  if len(s) == 0 or s[0] == '#':
233
234
  continue
234
-
235
+
235
236
  try:
236
-
237
+
237
238
  tokens = s.split()
238
- assert len(tokens) == 5, '{} tokens'.format(len(tokens))
239
-
239
+ assert len(tokens) == 5, '{} tokens'.format(len(tokens))
240
+
240
241
  if category_id_to_name is not None:
241
242
  category_id = int(tokens[0])
242
243
  assert category_id in category_id_to_name, \
243
244
  'Unrecognized category ID {}'.format(category_id)
244
-
245
+
245
246
  yolo_bbox = [float(x) for x in tokens[1:]]
246
-
247
+
247
248
  except Exception as e:
248
249
  label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
249
250
  continue
250
-
251
+
251
252
  normalized_x_center = yolo_bbox[0]
252
253
  normalized_y_center = yolo_bbox[1]
253
254
  normalized_width = yolo_bbox[2]
254
255
  normalized_height = yolo_bbox[3]
255
-
256
+
256
257
  normalized_x_min = normalized_x_center - normalized_width / 2.0
257
258
  normalized_x_max = normalized_x_center + normalized_width / 2.0
258
259
  normalized_y_min = normalized_y_center - normalized_height / 2.0
259
260
  normalized_y_max = normalized_y_center + normalized_height / 2.0
260
-
261
+
261
262
  if normalized_x_min < 0 or normalized_y_min < 0 or \
262
263
  normalized_x_max > 1 or normalized_y_max > 1:
263
264
  label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
264
265
  normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
265
-
266
+
266
267
  # ...for each line
267
-
268
+
268
269
  if verbose:
269
270
  if len(label_result['errors']) > 0:
270
271
  print('Errors for {}:'.format(label_file))
271
272
  for error in label_result['errors']:
272
273
  print(error)
273
-
274
+
274
275
  return label_result
275
-
276
+
276
277
  # ...def validate_label_file(...)
277
278
 
278
-
279
- def validate_yolo_dataset(input_folder,
280
- class_name_file,
281
- n_workers=1,
282
- pool_type='thread',
279
+
280
+ def validate_yolo_dataset(input_folder,
281
+ class_name_file,
282
+ n_workers=1,
283
+ pool_type='thread',
283
284
  verbose=False):
284
285
  """
285
- Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
286
+ Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
286
287
  labels and images are in different folders (yolo_to_coco() supports this).
287
-
288
+
288
289
  Looks for:
289
-
290
+
290
291
  * Image files without label files
291
292
  * Text files without image files
292
293
  * Illegal classes in label files
@@ -294,103 +295,109 @@ def validate_yolo_dataset(input_folder,
294
295
 
295
296
  Args:
296
297
  input_folder (str): the YOLO dataset folder to validate
297
- class_name_file (str or list): a list of classes, a flat text file, or a yolo
298
- dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
298
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
299
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
299
300
  input_folder as the base folder, though this is not explicitly checked.
300
301
  n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
301
302
  parallelization
302
303
  pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
303
304
  not used if [n_workers] <= 1
304
305
  verbose (bool, optional): enable additional debug console output
305
-
306
+
306
307
  Returns:
307
- dict: validation results, as a dict with fields:
308
-
308
+ dict: validation results, as a dict with fields:
309
+
309
310
  - image_files_without_label_files (list)
310
311
  - label_files_without_image_files (list)
311
312
  - label_results (list of dicts with field 'filename', 'errors') (list)
312
313
  """
313
-
314
+
314
315
  # Validate arguments
315
316
  assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
316
317
  if n_workers > 1:
317
318
  assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
318
-
319
+
319
320
  category_id_to_name = load_yolo_class_list(class_name_file)
320
-
321
+
321
322
  print('Enumerating files in {}'.format(input_folder))
322
-
323
+
323
324
  all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
324
325
  convert_slashes=True)
325
326
  label_files = [fn for fn in all_files if fn.endswith('.txt')]
326
327
  image_files = find_image_strings(all_files)
327
328
  print('Found {} images files and {} label files in {}'.format(
328
329
  len(image_files),len(label_files),input_folder))
329
-
330
+
330
331
  label_files_set = set(label_files)
331
-
332
+
332
333
  image_files_without_extension = set()
333
334
  for fn in image_files:
334
335
  image_file_without_extension = os.path.splitext(fn)[0]
335
336
  assert image_file_without_extension not in image_files_without_extension, \
336
337
  'Duplicate image file, likely with different extensions: {}'.format(fn)
337
338
  image_files_without_extension.add(image_file_without_extension)
338
-
339
+
339
340
  print('Looking for missing image/label files')
340
-
341
+
341
342
  image_files_without_label_files = []
342
343
  label_files_without_images = []
343
-
344
+
344
345
  for image_file in tqdm(image_files):
345
346
  expected_label_file = os.path.splitext(image_file)[0] + '.txt'
346
347
  if expected_label_file not in label_files_set:
347
348
  image_files_without_label_files.append(image_file)
348
-
349
+
349
350
  for label_file in tqdm(label_files):
350
351
  expected_image_file_without_extension = os.path.splitext(label_file)[0]
351
352
  if expected_image_file_without_extension not in image_files_without_extension:
352
353
  label_files_without_images.append(label_file)
353
-
354
+
354
355
  print('Found {} image files without labels, {} labels without images'.format(
355
356
  len(image_files_without_label_files),len(label_files_without_images)))
356
357
 
357
358
  print('Validating label files')
358
-
359
+
359
360
  if n_workers <= 1:
360
-
361
- label_results = []
362
- for fn_abs in tqdm(label_files):
361
+
362
+ label_results = []
363
+ for fn_abs in tqdm(label_files):
363
364
  label_results.append(validate_label_file(fn_abs,
364
365
  category_id_to_name=category_id_to_name,
365
366
  verbose=verbose))
366
-
367
+
367
368
  else:
368
-
369
+
369
370
  assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
370
-
371
- if pool_type == 'thread':
372
- pool = ThreadPool(n_workers)
373
- else:
374
- pool = Pool(n_workers)
375
-
376
- print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
377
-
378
- p = partial(validate_label_file,
379
- category_id_to_name=category_id_to_name,
380
- verbose=verbose)
381
- label_results = list(tqdm(pool.imap(p, label_files),
382
- total=len(label_files)))
383
-
371
+
372
+ pool = None
373
+ try:
374
+ if pool_type == 'thread':
375
+ pool = ThreadPool(n_workers)
376
+ else:
377
+ pool = Pool(n_workers)
378
+
379
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
380
+
381
+ p = partial(validate_label_file,
382
+ category_id_to_name=category_id_to_name,
383
+ verbose=verbose)
384
+ label_results = list(tqdm(pool.imap(p, label_files),
385
+ total=len(label_files)))
386
+ finally:
387
+ pool.close()
388
+ pool.join()
389
+ print("Pool closed and joined for label file validation")
390
+
384
391
  assert len(label_results) == len(label_files)
385
-
392
+
386
393
  validation_results = {}
387
394
  validation_results['image_files_without_label_files'] = image_files_without_label_files
388
395
  validation_results['label_files_without_images'] = label_files_without_images
389
396
  validation_results['label_results'] = label_results
390
-
397
+
391
398
  return validation_results
392
-
393
- # ...validate_yolo_dataset(...)
399
+
400
+ # ...validate_yolo_dataset(...)
394
401
 
395
402
 
396
403
  #%% Main conversion function
@@ -411,32 +418,32 @@ def yolo_to_coco(input_folder,
411
418
  label_folder=None):
412
419
  """
413
420
  Converts a YOLO-formatted dataset to a COCO-formatted dataset.
414
-
415
- All images will be assigned an "error" value, usually None.
416
-
421
+
422
+ All images will be assigned an "error" value, usually None.
423
+
417
424
  Args:
418
- input_folder (str): the YOLO dataset folder to convert. If the image and label
425
+ input_folder (str): the YOLO dataset folder to convert. If the image and label
419
426
  folders are different, this is the image folder, and [label_folder] is the
420
427
  label folder.
421
- class_name_file (str or list): a list of classes, a flat text file, or a yolo
422
- dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
428
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
429
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
423
430
  input_folder as the base folder, though this is not explicitly checked.
424
431
  output_file (str, optional): .json file to which we should write COCO .json data
425
432
  empty_image_handling (str, optional): how to handle images with no boxes; whether
426
- this includes images with no .txt files depending on the value of
433
+ this includes images with no .txt files depending on the value of
427
434
  [allow_images_without_label_files]. Can be:
428
-
435
+
429
436
  - 'no_annotations': include the image in the image list, with no annotations
430
437
  - 'empty_annotations': include the image in the image list, and add an annotation without
431
438
  any bounding boxes, using a category called [empty_image_category_name].
432
439
  - 'skip': don't include the image in the image list
433
- - 'error': there shouldn't be any empty images
440
+ - 'error': there shouldn't be any empty images
434
441
  error_image_handling (str, optional): how to handle images that don't load properly; can
435
442
  be:
436
-
443
+
437
444
  - 'skip': don't include the image at all
438
445
  - 'no_annotations': include with no annotations
439
-
446
+
440
447
  n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
441
448
  parallelization
442
449
  pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
@@ -444,27 +451,27 @@ def yolo_to_coco(input_folder,
444
451
  recursive (bool, optional): whether to recurse into [input_folder]
445
452
  exclude_string (str, optional): exclude any images whose filename contains a string
446
453
  include_string (str, optional): include only images whose filename contains a string
447
- overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
454
+ overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
448
455
  'error')
449
456
  label_folder (str, optional): label folder, if different from the image folder
450
-
457
+
451
458
  Returns:
452
459
  dict: COCO-formatted data, the same as what's written to [output_file]
453
460
  """
454
-
461
+
455
462
  ## Validate input
456
-
463
+
457
464
  input_folder = input_folder.replace('\\','/')
458
-
465
+
459
466
  assert os.path.isdir(input_folder)
460
467
  assert os.path.isfile(class_name_file)
461
-
468
+
462
469
  assert empty_image_handling in \
463
470
  ('no_annotations','empty_annotations','skip','error'), \
464
471
  'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
465
-
472
+
466
473
  if (output_file is not None) and os.path.isfile(output_file):
467
-
474
+
468
475
  if overwrite_handling == 'overwrite':
469
476
  print('Warning: output file {} exists, over-writing'.format(output_file))
470
477
  elif overwrite_handling == 'load':
@@ -476,62 +483,62 @@ def yolo_to_coco(input_folder,
476
483
  raise ValueError('Output file {} exists'.format(output_file))
477
484
  else:
478
485
  raise ValueError('Unrecognized overwrite_handling value: {}'.format(overwrite_handling))
479
-
480
-
486
+
487
+
481
488
  ## Read class names
482
-
489
+
483
490
  category_id_to_name = load_yolo_class_list(class_name_file)
484
-
485
-
491
+
492
+
486
493
  # Find or create the empty image category, if necessary
487
494
  empty_category_id = None
488
-
495
+
489
496
  if (empty_image_handling == 'empty_annotations'):
490
497
  category_name_to_id = invert_dictionary(category_id_to_name)
491
498
  if empty_image_category_name in category_name_to_id:
492
499
  empty_category_id = category_name_to_id[empty_image_category_name]
493
500
  print('Using existing empty image category with name {}, ID {}'.format(
494
- empty_image_category_name,empty_category_id))
501
+ empty_image_category_name,empty_category_id))
495
502
  else:
496
503
  empty_category_id = len(category_id_to_name)
497
504
  print('Adding an empty category with name {}, ID {}'.format(
498
505
  empty_image_category_name,empty_category_id))
499
506
  category_id_to_name[empty_category_id] = empty_image_category_name
500
-
501
-
507
+
508
+
502
509
  ## Enumerate images
503
-
510
+
504
511
  print('Enumerating images...')
505
-
512
+
506
513
  image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
507
514
  assert not any(['\\' in fn for fn in image_files_abs])
508
515
 
509
516
  n_files_original = len(image_files_abs)
510
-
517
+
511
518
  # Optionally include/exclude images matching specific strings
512
519
  if exclude_string is not None:
513
520
  image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
514
521
  if include_string is not None:
515
522
  image_files_abs = [fn for fn in image_files_abs if include_string in fn]
516
-
523
+
517
524
  if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
518
525
  n_excluded = n_files_original - len(image_files_abs)
519
526
  print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
520
-
527
+
521
528
  categories = []
522
-
529
+
523
530
  for category_id in category_id_to_name:
524
531
  categories.append({'id':category_id,'name':category_id_to_name[category_id]})
525
-
532
+
526
533
  info = {}
527
534
  info['version'] = '1.0'
528
535
  info['description'] = 'Converted from YOLO format'
529
-
536
+
530
537
  image_ids = set()
531
-
532
-
538
+
539
+
533
540
  ## If we're expected to have labels for every image, check before we process all the images
534
-
541
+
535
542
  if not allow_images_without_label_files:
536
543
  print('Verifying that label files exist')
537
544
  # image_file_abs = image_files_abs[0]
@@ -544,88 +551,88 @@ def yolo_to_coco(input_folder,
544
551
  label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
545
552
  assert os.path.isfile(label_file_abs), \
546
553
  'No annotation file for {}'.format(image_file_abs)
547
-
548
-
554
+
555
+
549
556
  ## Initial loop to make sure image IDs will be unique
550
-
557
+
551
558
  print('Validating image IDs...')
552
-
559
+
553
560
  for fn_abs in tqdm(image_files_abs):
554
-
561
+
555
562
  fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
556
563
  image_id = _filename_to_image_id(fn_relative)
557
564
  assert image_id not in image_ids, \
558
565
  'Oops, you have hit a very esoteric case where you have the same filename ' + \
559
566
  'with both spaces and underscores, this is not currently handled.'
560
567
  image_ids.add(image_id)
561
-
562
-
568
+
569
+
563
570
  ## Main loop to process labels
564
-
571
+
565
572
  print('Processing labels...')
566
-
573
+
567
574
  if n_workers <= 1:
568
-
569
- image_results = []
575
+
576
+ image_results = []
570
577
  # fn_abs = image_files_abs[0]
571
- for fn_abs in tqdm(image_files_abs):
578
+ for fn_abs in tqdm(image_files_abs):
572
579
  image_results.append(_process_image(fn_abs,
573
580
  input_folder,
574
581
  category_id_to_name,
575
582
  label_folder))
576
-
583
+
577
584
  else:
578
-
585
+
579
586
  assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
580
-
587
+
581
588
  if pool_type == 'thread':
582
589
  pool = ThreadPool(n_workers)
583
590
  else:
584
591
  pool = Pool(n_workers)
585
-
592
+
586
593
  print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
587
-
594
+
588
595
  p = partial(_process_image,
589
596
  input_folder=input_folder,
590
597
  category_id_to_name=category_id_to_name,
591
598
  label_folder=label_folder)
592
599
  image_results = list(tqdm(pool.imap(p, image_files_abs),
593
600
  total=len(image_files_abs)))
594
-
595
-
601
+
602
+
596
603
  assert len(image_results) == len(image_files_abs)
597
-
598
-
604
+
605
+
599
606
  ## Re-assembly of results into a COCO dict
600
-
607
+
601
608
  print('Assembling labels...')
602
-
609
+
603
610
  images = []
604
611
  annotations = []
605
-
612
+
606
613
  for image_result in tqdm(image_results):
607
-
614
+
608
615
  im = image_result[0]
609
616
  annotations_this_image = image_result[1]
610
-
617
+
611
618
  # If we have annotations for this image
612
619
  if len(annotations_this_image) > 0:
613
620
  assert im['error'] is None
614
621
  images.append(im)
615
622
  for ann in annotations_this_image:
616
623
  annotations.append(ann)
617
-
624
+
618
625
  # If this image failed to read
619
626
  elif im['error'] is not None:
620
-
627
+
621
628
  if error_image_handling == 'skip':
622
629
  pass
623
630
  elif error_image_handling == 'no_annotations':
624
- images.append(im)
625
-
631
+ images.append(im)
632
+
626
633
  # If this image read successfully, but there are no annotations
627
634
  else:
628
-
635
+
629
636
  if empty_image_handling == 'skip':
630
637
  pass
631
638
  elif empty_image_handling == 'no_annotations':
@@ -641,13 +648,13 @@ def yolo_to_coco(input_folder,
641
648
  # we're adopting.
642
649
  # ann['bbox'] = [0,0,0,0]
643
650
  annotations.append(ann)
644
- images.append(im)
645
-
651
+ images.append(im)
652
+
646
653
  # ...for each image result
647
-
654
+
648
655
  print('Read {} annotations for {} images'.format(len(annotations),
649
656
  len(images)))
650
-
657
+
651
658
  d = {}
652
659
  d['images'] = images
653
660
  d['annotations'] = annotations
@@ -667,25 +674,25 @@ def yolo_to_coco(input_folder,
667
674
  #%% Interactive driver
668
675
 
669
676
  if False:
670
-
677
+
671
678
  pass
672
679
 
673
680
  #%% Convert YOLO folders to COCO
674
-
681
+
675
682
  preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
676
683
  input_folder = '/home/user/data/noaa-fish/val'
677
684
  output_file = '/home/user/data/noaa-fish/val.json'
678
685
  class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
679
686
 
680
687
  d = yolo_to_coco(input_folder,class_name_file,output_file)
681
-
688
+
682
689
  input_folder = '/home/user/data/noaa-fish/train'
683
690
  output_file = '/home/user/data/noaa-fish/train.json'
684
691
  class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
685
692
 
686
693
  d = yolo_to_coco(input_folder,class_name_file,output_file)
687
-
688
-
694
+
695
+
689
696
  #%% Check DB integrity
690
697
 
691
698
  from megadetector.data_management.databases import integrity_check_json_db
@@ -715,7 +722,7 @@ if False:
715
722
  output_dir=preview_folder,
716
723
  image_base_dir=input_folder,
717
724
  options=viz_options)
718
-
725
+
719
726
  from megadetector.utils.path_utils import open_file
720
727
  open_file(html_output_file)
721
728