megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -0,0 +1,272 @@
1
+ """
2
+
3
+ coco_to_labelme.py
4
+
5
+ Converts a COCO dataset to labelme format (one .json per image file).
6
+
7
+ If you want to convert YOLO-formatted data to labelme format, use yolo_to_coco, then
8
+ coco_to_labelme.
9
+
10
+ """
11
+
12
+ #%% Imports and constants
13
+
14
+ import os
15
+ import json
16
+
17
+ from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ from md_visualization.visualization_utils import open_image
21
+
22
+
23
+ #%% Functions
24
+
25
+ def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
26
+ """
27
+ For the given image struct in COCO format and associated list of annotations, reformats the
28
+ detections into labelme format.
29
+
30
+ Args:
31
+ im (dict): image dict, as loaded from a COCO .json file; 'height' and 'width' are required
32
+ annotations (list): a list of annotations that refer to this image (this function errors if
33
+ that's not the case)
34
+ categories (list): a list of category in dicts in COCO format ({'id':x,'name':'s'})
35
+ info (dict, optional): a dict to store in a non-standard "custom_info" field in the output
36
+
37
+ Returns:
38
+ dict: a dict in labelme format, suitable for writing to a labelme .json file
39
+ """
40
+
41
+ image_base_name = os.path.basename(im['file_name'])
42
+
43
+ output_dict = {}
44
+ if info is not None:
45
+ output_dict['custom_info'] = info
46
+ output_dict['version'] = '5.3.0a0'
47
+ output_dict['flags'] = {}
48
+ output_dict['shapes'] = []
49
+ output_dict['imagePath'] = image_base_name
50
+ output_dict['imageHeight'] = im['height']
51
+ output_dict['imageWidth'] = im['width']
52
+ output_dict['imageData'] = None
53
+
54
+ # Store COCO categories in case we want to reconstruct the original IDs later
55
+ output_dict['coco_categories'] = categories
56
+
57
+ category_id_to_name = {c['id']:c['name'] for c in categories}
58
+
59
+ if 'flags' in im:
60
+ output_dict['flags'] = im['flags']
61
+
62
+ # ann = annotations[0]
63
+ for ann in annotations:
64
+
65
+ assert ann['image_id'] == im['id'], 'Annotation {} does not refer to image {}'.format(
66
+ ann['id'],im['id'])
67
+
68
+ if 'bbox' not in ann:
69
+ continue
70
+
71
+ shape = {}
72
+ shape['label'] = category_id_to_name[ann['category_id']]
73
+ shape['shape_type'] = 'rectangle'
74
+ shape['description'] = ''
75
+ shape['group_id'] = None
76
+
77
+ # COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
78
+ #
79
+ # labelme boxes are [[x0,y0],[x1,y1]] (absolute)
80
+ x0 = ann['bbox'][0]
81
+ y0 = ann['bbox'][1]
82
+ x1 = ann['bbox'][0] + ann['bbox'][2]
83
+ y1 = ann['bbox'][1] + ann['bbox'][3]
84
+
85
+ shape['points'] = [[x0,y0],[x1,y1]]
86
+ output_dict['shapes'].append(shape)
87
+
88
+ # ...for each detection
89
+
90
+ return output_dict
91
+
92
+ # ...def get_labelme_dict_for_image()
93
+
94
+
95
+ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
96
+ """
97
+ For all the images in [coco_data] (a dict or a filename), write a .json file in
98
+ labelme format alongside the corresponding relative path within image_base.
99
+ """
100
+
101
+ # Load COCO data if necessary
102
+ if isinstance(coco_data,str):
103
+ with open(coco_data,'r') as f:
104
+ coco_data = json.load(f)
105
+ assert isinstance(coco_data,dict)
106
+
107
+
108
+ ## Read image sizes if necessary
109
+
110
+ if bypass_image_size_check:
111
+
112
+ print('Bypassing size check')
113
+
114
+ else:
115
+
116
+ # TODO: parallelize this loop
117
+
118
+ print('Reading/validating image sizes...')
119
+
120
+ # im = coco_data['images'][0]
121
+ for im in tqdm(coco_data['images']):
122
+
123
+ # Make sure this file exists
124
+ im_full_path = os.path.join(image_base,im['file_name'])
125
+ assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
126
+
127
+ # Load w/h information if necessary
128
+ if 'height' not in im or 'width' not in im:
129
+
130
+ try:
131
+ pil_im = open_image(im_full_path)
132
+ im['width'] = pil_im.width
133
+ im['height'] = pil_im.height
134
+ except Exception:
135
+ print('Warning: cannot open image {}'.format(im_full_path))
136
+ if 'failure' not in im:
137
+ im['failure'] = 'Failure image access'
138
+
139
+ # ...if we need to read w/h information
140
+
141
+ # ...for each image
142
+
143
+ # ...if we need to load image sizes
144
+
145
+
146
+ ## Generate labelme files
147
+
148
+ print('Generating .json files...')
149
+
150
+ image_id_to_annotations = defaultdict(list)
151
+ for ann in coco_data['annotations']:
152
+ image_id_to_annotations[ann['image_id']].append(ann)
153
+
154
+ n_json_files_written = 0
155
+ n_json_files_error = 0
156
+ n_json_files_exist = 0
157
+
158
+ # Write output
159
+ for im in tqdm(coco_data['images']):
160
+
161
+ # Skip this image if it failed to load in whatever system generated this COCO file
162
+ skip_image = False
163
+
164
+ # Errors are represented differently depending on the source
165
+ for error_string in ('failure','error'):
166
+ if (error_string in im) and (im[error_string] is not None):
167
+ if verbose:
168
+ print('Warning: skipping labelme file generation for failed image {}'.format(
169
+ im['file_name']))
170
+ skip_image = True
171
+ n_json_files_error += 1
172
+ break
173
+ if skip_image:
174
+ continue
175
+
176
+ im_full_path = os.path.join(image_base,im['file_name'])
177
+ json_path = os.path.splitext(im_full_path)[0] + '.json'
178
+
179
+ if (not overwrite) and (os.path.isfile(json_path)):
180
+ if verbose:
181
+ print('Skipping existing file {}'.format(json_path))
182
+ n_json_files_exist += 1
183
+ continue
184
+
185
+ annotations_this_image = image_id_to_annotations[im['id']]
186
+ output_dict = get_labelme_dict_for_image_from_coco_record(im,
187
+ annotations_this_image,
188
+ coco_data['categories'],
189
+ info=None)
190
+
191
+ n_json_files_written += 1
192
+ with open(json_path,'w') as f:
193
+ json.dump(output_dict,f,indent=1)
194
+
195
+ # ...for each image
196
+
197
+ print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
198
+ n_json_files_written,n_json_files_error,n_json_files_exist))
199
+
200
+ # ...def coco_to_labelme()
201
+
202
+
203
+ #%% Interactive driver
204
+
205
+ if False:
206
+
207
+ pass
208
+
209
+ #%% Configure options
210
+
211
+ coco_file = \
212
+ r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
213
+ image_folder = os.path.dirname(coco_file)
214
+ overwrite = True
215
+
216
+
217
+ #%% Programmatic execution
218
+
219
+ coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
220
+
221
+
222
+ #%% Command-line execution
223
+
224
+ s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
225
+ if overwrite:
226
+ s += ' --overwrite'
227
+
228
+ print(s)
229
+ import clipboard; clipboard.copy(s)
230
+
231
+
232
+ #%% Opening labelme
233
+
234
+ s = 'python labelme {}'.format(image_folder)
235
+ print(s)
236
+ import clipboard; clipboard.copy(s)
237
+
238
+
239
+ #%% Command-line driver
240
+
241
+ import sys,argparse
242
+
243
+ def main():
244
+
245
+ parser = argparse.ArgumentParser(
246
+ description='Convert a COCO database to labelme annotation format')
247
+
248
+ parser.add_argument(
249
+ 'coco_file',
250
+ type=str,
251
+ help='Path to COCO data file (.json)')
252
+
253
+ parser.add_argument(
254
+ 'image_base',
255
+ type=str,
256
+ help='Path to images (also the output folder)')
257
+
258
+ parser.add_argument(
259
+ '--overwrite',
260
+ action='store_true',
261
+ help='Overwrite existing labelme .json files')
262
+
263
+ if len(sys.argv[1:]) == 0:
264
+ parser.print_help()
265
+ parser.exit()
266
+
267
+ args = parser.parse_args()
268
+
269
+ coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
270
+
271
+ if __name__ == '__main__':
272
+ main()
@@ -1,20 +1,20 @@
1
- ########
2
- #
3
- # coco_to_yolo.py
4
- #
5
- # Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
6
- # the dataset (to a single folder) in the process.
7
- #
8
- # If the input and output folders are the same, writes .txt files to the input folder,
9
- # and neither moves nor modifies images.
10
- #
11
- # Currently ignores segmentation masks, and errors if an annotation has a
12
- # segmentation polygon but no bbox.
13
- #
14
- # Has only been tested on a handful of COCO Camera Traps data sets; if you
15
- # use it for more general COCO conversion, YMMV.
16
- #
17
- ########
1
+ """
2
+
3
+ coco_to_yolo.py
4
+
5
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
6
+ the dataset (to a single folder) in the process.
7
+
8
+ If the input and output folders are the same, writes .txt files to the input folder,
9
+ and neither moves nor modifies images.
10
+
11
+ Currently ignores segmentation masks, and errors if an annotation has a
12
+ segmentation polygon but no bbox.
13
+
14
+ Has only been tested on a handful of COCO Camera Traps data sets; if you
15
+ use it for more general COCO conversion, YMMV.
16
+
17
+ """
18
18
 
19
19
  #%% Imports and constants
20
20
 
@@ -37,16 +37,16 @@ def write_yolo_dataset_file(yolo_dataset_file,
37
37
  val_folder_relative=None,
38
38
  test_folder_relative=None):
39
39
  """
40
- Write a YOLOv5 dataset.yaml file to the absolute path yolo_dataset_file (should
40
+ Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
41
41
  have a .yaml extension, though it's only a warning if it doesn't).
42
-
43
- [dataset_base_dir] should be the absolute path of the dataset root.
44
42
 
45
- yolo_dataset_file does not have to be within dataset_base_dir.
46
-
47
- [class_list] can be an ordered list of class names (the first item will be class 0,
48
- etc.), or the name of a text file containing an ordered list of class names (one per
49
- line, starting from class zero).
43
+ Args:
44
+ yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
45
+ Does not have to be within dataset_base_dir.
46
+ dataset_base_dir (str): the absolute base path of the YOLO dataset
47
+ class_list (list or str): an ordered list of class names (the first item will be class 0,
48
+ etc.), or the name of a text file containing an ordered list of class names (one per
49
+ line, starting from class zero).
50
50
  """
51
51
 
52
52
  # Read class names
@@ -56,6 +56,10 @@ def write_yolo_dataset_file(yolo_dataset_file,
56
56
  class_lines = [s.strip() for s in class_lines]
57
57
  class_list = [s for s in class_lines if len(s) > 0]
58
58
 
59
+ if not (yolo_dataset_file.endswith('.yml') or yolo_dataset_file.endswith('.yaml')):
60
+ print('Warning: writing dataset file to a non-yml/yaml extension:\n{}'.format(
61
+ yolo_dataset_file))
62
+
59
63
  # Write dataset.yaml
60
64
  with open(yolo_dataset_file,'w') as f:
61
65
 
@@ -78,7 +82,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
78
82
  # ...def write_yolo_dataset_file(...)
79
83
 
80
84
 
81
- def coco_to_yolo(input_image_folder,output_folder,input_file,
85
+ def coco_to_yolo(input_image_folder,
86
+ output_folder,
87
+ input_file,
82
88
  source_format='coco',
83
89
  overwrite_images=False,
84
90
  create_image_and_label_folders=False,
@@ -93,7 +99,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
93
99
  write_output=True,
94
100
  flatten_paths=True):
95
101
  """
96
- Convert a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
102
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
97
103
  dataset to a single folder in the process.
98
104
 
99
105
  If the input and output folders are the same, writes .txt files to the input folder,
@@ -102,32 +108,51 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
102
108
  Currently ignores segmentation masks, and errors if an annotation has a
103
109
  segmentation polygon but no bbox.
104
110
 
105
- source_format can be 'coco' (default) or 'coco_camera_traps'. The only difference
106
- is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
107
- annotation with a category id of 0 as a special case, i.e. that's how an empty image
108
- is indicated. The original COCO standard is a little ambiguous on this issue. If
109
- source_format is 'coco', we either treat images as empty or error, depending on the value
110
- of allow_empty_annotations. allow_empty_annotations has no effect if source_format is
111
- 'coco_camera_traps'.
112
-
113
- If create_image_and_label_folders is false, a/b/c/image001.jpg will become a#b#c#image001.jpg,
114
- and the corresponding text file will be a#b#c#image001.txt.
115
-
116
- If create_image_and_label_folders is true, a/b/c/image001.jpg will become
117
- images/a#b#c#image001.jpg, and the corresponding text file will be
118
- labels/a#b#c#image001.txt. Some tools still use this variant of the YOLO standard.
119
-
120
- If clip_boxes is True, bounding boxes coordinates will be clipped to [0,1].
121
-
122
- image_id_to_output_image_json_file is an optional *output* file, to which we will write
123
- a mapping from image IDs to output file names.
124
-
125
- images_to_exclude is a list of image files (relative paths in the input folder) that we
126
- should ignore.
127
-
128
- write_output determines whether we actually copy images and write annotations;
129
- setting this to False basically puts this function in "test mode". The class list
130
- file is written regardless of the value of write_output.
111
+ Args:
112
+ input_image_folder (str): the folder where images live; filenames in the COCO .json
113
+ file [input_file] should be relative to this folder
114
+ output_folder (str): the base folder for the YOLO dataset
115
+ input_file (str): a .json file in COCO format; can be the same as [input_image_folder], in which case
116
+ images are left alone.
117
+ source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
118
+ is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
119
+ annotation with a category id of 0 as a special case, i.e. that's how an empty image
120
+ is indicated. The original COCO standard is a little ambiguous on this issue. If
121
+ source_format is 'coco', we either treat images as empty or error, depending on the value
122
+ of [allow_empty_annotations]. [allow_empty_annotations] has no effect if source_format is
123
+ 'coco_camera_traps'.
124
+ create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
125
+ 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
126
+ a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
127
+ be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
128
+ images/a#b#c#image001.jpg, and the corresponding text file will be
129
+ labels/a#b#c#image001.txt.
130
+ clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
131
+ converting to YOLO xywh format
132
+ image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
133
+ a mapping from image IDs to output file names
134
+ images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
135
+ should ignore
136
+ path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
137
+ path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
138
+ becomes a#b#c#d.jpg
139
+ category_names_to_exclude (str, optional): category names that should not be represented in the
140
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
141
+ you would want to specify this and [category_names_to_include].
142
+ category_names_to_include (str, optional): allow-list of category names that should be represented in the
143
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
144
+ you would want to specify this and [category_names_to_exclude].
145
+ write_output (bool, optional): determines whether we actually copy images and write annotations;
146
+ setting this to False mostly puts this function in "dry run" "mode. The class list
147
+ file is written regardless of the value of write_output.
148
+
149
+ Returns:
150
+ dict: information about the coco --> yolo mapping, containing at least the fields:
151
+
152
+ - class_list_filename: the filename to which we wrote the flat list of class names required
153
+ by the YOLO format.
154
+ - source_image_to_dest_image: a dict mapping source images to destination images
155
+ - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
131
156
  """
132
157
 
133
158
  ## Validate input
@@ -189,7 +214,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
189
214
  coco_id_to_name = {}
190
215
  yolo_id_to_name = {}
191
216
  coco_category_ids_to_exclude = set()
192
- category_exclusion_warnings_printed = set()
193
217
 
194
218
  for category in data['categories']:
195
219
  coco_id_to_name[category['id']] = category['name']
@@ -465,9 +489,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
465
489
  #
466
490
  # https://github.com/ultralytics/yolov5/issues/3218
467
491
  #
468
- # I think this is also true for images with empty annotation files, but
469
- # I'm using the convention suggested on that issue, i.e. hard negatives
470
- # are expressed as images without .txt files.
492
+ # I think this is also true for images with empty .txt files, but
493
+ # I'm using the convention suggested on that issue, i.e. hard
494
+ # negatives are expressed as images without .txt files.
471
495
  if len(bboxes) > 0:
472
496
 
473
497
  with open(dest_txt,'w') as f:
@@ -497,12 +521,12 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
497
521
  class_list_output_name='object.data',
498
522
  force_lowercase_image_extension=False):
499
523
  """
500
- Given a YOLO-formatted folder of images and .txt files, create a folder
524
+ Given a YOLO-formatted folder of images and .txt files, creates a folder
501
525
  of symlinks to all the images, and a folder of symlinks to all the labels.
502
- Used to support preview/editing tools (like BoundingBoxEditor) that assume
503
- images and labels are in separate folders.
526
+ Used to support preview/editing tools that assume images and labels are in separate
527
+ folders.
504
528
 
505
- images_folder and labels_folder are absolute paths.
529
+ :meta private:
506
530
  """
507
531
 
508
532
  assert source_folder != images_folder and source_folder != labels_folder
@@ -616,7 +640,7 @@ def main():
616
640
  parser.add_argument(
617
641
  '--create_bounding_box_editor_symlinks',
618
642
  action='store_true',
619
- help='Prepare symlinks so the whole folder is BoundingBoxEditor-friendly')
643
+ help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
620
644
 
621
645
  if len(sys.argv[1:]) == 0:
622
646
  parser.print_help()
File without changes
@@ -1,10 +1,12 @@
1
- ########
2
- #
3
- # add_width_and_height_to_db.py
4
- #
5
- # Grabs width and height from actual image files for a .json database that is missing w/h.
6
- #
7
- ########
1
+ """
2
+
3
+ add_width_and_height_to_db.py
4
+
5
+ Grabs width and height from actual image files for a .json database that is missing w/h.
6
+
7
+ TODO: this is a one-off script waiting to be cleaned up for more general use.
8
+
9
+ """
8
10
 
9
11
  #%% Imports and constants
10
12
 
@@ -14,16 +16,18 @@ from PIL import Image
14
16
  datafile = '/datadrive/snapshotserengeti/databases/snapshotserengeti.json'
15
17
  image_base = '/datadrive/snapshotserengeti/images/'
16
18
 
19
+ def main():
17
20
 
18
- #%% Execution
21
+ with open(datafile,'r') as f:
22
+ data = json.load(f)
19
23
 
20
- with open(datafile,'r') as f:
21
- data = json.load(f)
24
+ for im in data['images']:
25
+ if 'height' not in im:
26
+ im_w, im_h = Image.open(image_base+im['file_name']).size
27
+ im['height'] = im_h
28
+ im['width'] = im_w
22
29
 
23
- for im in data['images']:
24
- if 'height' not in im:
25
- im_w, im_h = Image.open(image_base+im['file_name']).size
26
- im['height'] = im_h
27
- im['width'] = im_w
30
+ json.dump(data, open(datafile,'w'))
28
31
 
29
- json.dump(data, open(datafile,'w'))
32
+ if __name__ == '__main__':
33
+ main()
@@ -1,17 +1,19 @@
1
- ########
2
- #
3
- # combine_coco_camera_traps_files.py
4
- #
5
- # Merges two or more .json files in COCO Camera Traps format, optionally
6
- # writing the results to another .json file.
7
- #
8
- # - Concatenates image lists, erroring if images are not unique.
9
- # - Errors on unrecognized fields.
10
- # - Checks compatibility in info structs, within reason.
11
- #
12
- # combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
13
- #
14
- ########
1
+ """
2
+
3
+ combine_coco_camera_traps_files.py
4
+
5
+ Merges two or more .json files in COCO Camera Traps format, optionally
6
+ writing the results to another .json file.
7
+
8
+ - Concatenates image lists, erroring if images are not unique.
9
+ - Errors on unrecognized fields.
10
+ - Checks compatibility in info structs, within reason.
11
+
12
+ *Example command-line invocation*
13
+
14
+ combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
15
+
16
+ """
15
17
 
16
18
  #%% Constants and imports
17
19
 
@@ -19,26 +21,25 @@ import argparse
19
21
  import json
20
22
  import sys
21
23
 
22
- from typing import Any, Dict, Iterable, Mapping, List, Optional
23
-
24
24
 
25
25
  #%% Merge functions
26
26
 
27
- def combine_cct_files(input_files: List[str],
28
- output_file: Optional[str] = None,
29
- require_uniqueness: Optional[bool] = True,
30
- filename_prefixes: Optional[dict] = None
31
- ) -> Dict[str, Any]:
27
+ def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
28
+ filename_prefixes=None):
32
29
  """
33
- Merges list of COCO Camera Traps files *input_files* into a single
34
- dictionary, optionally writing the result to *output_file*.
30
+ Merges the list of COCO Camera Traps files [input_files] into a single
31
+ dictionary, optionally writing the result to [output_file].
35
32
 
36
33
  Args:
37
- input_files: list of str, paths to JSON detection files
38
- output_file: optional str, path to write merged JSON
39
- require_uniqueness: bool, whether to require that the images in
34
+ input_files (list): paths to CCT .json files
35
+ output_file (str, optional): path to write merged .json file
36
+ require_uniqueness (bool): whether to require that the images in
40
37
  each input_dict be unique
38
+
39
+ Returns:
40
+ dict: the merged COCO-formatted .json dict
41
41
  """
42
+
42
43
  input_dicts = []
43
44
  print('Loading input files')
44
45
  for fn in input_files:
@@ -61,11 +62,9 @@ def combine_cct_files(input_files: List[str],
61
62
  return merged_dict
62
63
 
63
64
 
64
- def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
65
- require_uniqueness: Optional[bool] = True
66
- ) -> Dict[str, Any]:
65
+ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
67
66
  """
68
- Merges the list of COCO Camera Traps dictionaries *input_dicts*. See header
67
+ Merges the list of COCO Camera Traps dictionaries [input_dicts]. See module header
69
68
  comment for details on merge rules.
70
69
 
71
70
  Args:
@@ -73,7 +72,8 @@ def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
73
72
  require_uniqueness: bool, whether to require that the images in
74
73
  each input_dict be unique
75
74
 
76
- Returns: dict, represents the merged JSON
75
+ Returns:
76
+ dict: the merged COCO-formatted .json dict
77
77
  """
78
78
 
79
79
  filename_to_image = {}
@@ -177,12 +177,16 @@ def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
177
177
  'categories': all_categories,
178
178
  'images': sorted_images,
179
179
  'annotations': all_annotations}
180
+
180
181
  return merged_dict
181
182
 
183
+ # ...combine_cct_dictionaries(...)
184
+
182
185
 
183
186
  #%% Command-line driver
184
187
 
185
188
  def main():
189
+
186
190
  parser = argparse.ArgumentParser()
187
191
  parser.add_argument(
188
192
  'input_paths', nargs='+',