megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # resize_coco_dataset.py
4
- #
5
- # Given a COCO-formatted dataset, resize all the images to a target size,
6
- # scaling bounding boxes accordingly.
7
- #
8
- ########
1
+ """
2
+
3
+ resize_coco_dataset.py
4
+
5
+ Given a COCO-formatted dataset, resizes all the images to a target size,
6
+ scaling bounding boxes accordingly.
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -26,31 +26,31 @@ from md_visualization.visualization_utils import \
26
26
  def resize_coco_dataset(input_folder,input_filename,
27
27
  output_folder,output_filename,
28
28
  target_size=(-1,-1),
29
- correct_size_image_handling='copy',
30
- right_edge_quantization_threshold=None):
29
+ correct_size_image_handling='copy'):
31
30
  """
32
- Given a COCO-formatted dataset (images in input_folder, data in input_filename), resize
33
- all the images to a target size (in output_folder) and scale bounding boxes accordingly
34
- (in output_filename).
35
-
36
- target_size should be a tuple/list of ints, length 2. If either dimension is -1, aspect ratio
37
- will be preserved. If both dimensions are -1, this means "keep the original size". If
38
- both dimensions are -1 and correct_size_image_handling is copy, this function is basically
39
- a no-op, although you might still use it for right_edge_quantization_threshold.
40
-
41
- correct_size_image_handling can be 'copy' (in which case the original image is just copied
42
- to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
43
- attempting to preserve the same quality). The only reason to do this is the case where
44
- you're superstitious about biases coming from images in a training set being written
45
- by different image encoders.
31
+ Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
32
+ all the images to a target size (in output_folder) and scales bounding boxes accordingly.
33
+
34
+ Args:
35
+ input_folder (str): the folder where images live; filenames in [input_filename] should
36
+ be relative to [input_folder]
37
+ input_filename (str): the (input) COCO-formatted .json file containing annotations
38
+ output_folder (str): the folder to which we should write resized images; can be the
39
+ same as [input_folder], in which case images are over-written
40
+ output_filename (str): the COCO-formatted .json file we should generate that refers to
41
+ the resized images
42
+ target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
43
+ If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
44
+ "keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
45
+ function is basically a no-op.
46
+ correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
47
+ to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
48
+ attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
49
+ you're superstitious about biases coming from images in a training set being written by different
50
+ image encoders.
46
51
 
47
- right_edge_quantization_threshold is an off-by-default hack to adjust large datasets where
48
- boxes that really should be running off the right side of the image only extend like 99%
49
- of the way there, due to what appears to be a slight bias inherent to MD. If a box extends
50
- within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
51
- 0.02) of the right edge of the image, it will be extended to the far right edge.
52
-
53
- Returns the COCO database with resized images.
52
+ Returns:
53
+ dict: the COCO database with resized images, identical to the content of [output_filename]
54
54
  """
55
55
 
56
56
  # Read input data
@@ -126,15 +126,6 @@ def resize_coco_dataset(input_folder,input_filename,
126
126
  bbox[2] * width_scale,
127
127
  bbox[3] * height_scale]
128
128
 
129
- # Do we need to quantize this box?
130
- if right_edge_quantization_threshold is not None and \
131
- right_edge_quantization_threshold > 0:
132
- bbox_right_edge_abs = bbox[0] + bbox[2]
133
- bbox_right_edge_norm = bbox_right_edge_abs / output_w
134
- bbox_right_edge_distance = (1.0 - bbox_right_edge_norm)
135
- if bbox_right_edge_distance < right_edge_quantization_threshold:
136
- bbox[2] = output_w - bbox[0]
137
-
138
129
  ann['bbox'] = bbox
139
130
 
140
131
  # ...if this annotation has a box
@@ -169,13 +160,10 @@ if False:
169
160
 
170
161
  correct_size_image_handling = 'rewrite'
171
162
 
172
- right_edge_quantization_threshold = 0.015
173
-
174
163
  resize_coco_dataset(input_folder,input_filename,
175
164
  output_folder,output_filename,
176
165
  target_size=target_size,
177
- correct_size_image_handling=correct_size_image_handling,
178
- right_edge_quantization_threshold=right_edge_quantization_threshold)
166
+ correct_size_image_handling=correct_size_image_handling)
179
167
 
180
168
 
181
169
  #%% Preview
@@ -0,0 +1,246 @@
1
+ """
2
+
3
+ wi_download_csv_to_coco.py
4
+
5
+ Converts a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
6
+
7
+ Currently assumes that common names are unique identifiers, which is convenient but unreliable.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+ import pandas as pd
16
+
17
+ from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ from md_visualization import visualization_utils as vis_utils
21
+ from md_utils.ct_utils import isnan
22
+
23
+ wi_extra_annotation_columns = \
24
+ ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
25
+ 'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
26
+ 'behavior','highlighted','markings')
27
+
28
+ wi_extra_image_columns = ('project_id','deployment_id')
29
+
30
+ def _make_location_id(project_id,deployment_id):
31
+ return 'project_' + str(project_id) + '_deployment_' + deployment_id
32
+
33
+ default_category_remappings = {
34
+ 'Homo Species':'Human',
35
+ 'Human-Camera Trapper':'Human',
36
+ 'No CV Result':'Unknown'
37
+ }
38
+
39
+
40
+ #%% Main function
41
+
42
+ def wi_download_csv_to_coco(csv_file_in,
43
+ coco_file_out=None,
44
+ image_folder=None,
45
+ validate_images=False,
46
+ gs_prefix=None,
47
+ verbose=True,
48
+ category_remappings=default_category_remappings):
49
+ """
50
+ Converts a .csv file from a Wildlife Insights project export to a COCO
51
+ Camera Traps .json file.
52
+
53
+ Args:
54
+ csv_file_in (str): the downloaded .csv file we should convert to COCO
55
+ coco_file_out (str, optional): the .json file we should write; if [coco_file_out] is None,
56
+ uses [csv_file_in].json
57
+ image_folder (str, optional): the folder where images live, only relevant if
58
+ [validate_images] is True
59
+ validate_images (bool, optional): whether to check images for corruption and load
60
+ image sizes; if this is True, [image_folder] must be a valid folder
61
+ gs_prefix (str, optional): a string to remove from GS URLs to convert to path names...
62
+ for example, if your gs:// URLs look like:
63
+
64
+ `gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg`
65
+
66
+ ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
67
+ the .json file will look like:
68
+
69
+ `55554/dfadfasdfs.jpg`
70
+ verbose (bool, optional): enable additional debug console output
71
+ category_remappings (dict, optional): str --> str dict that maps any number of
72
+ WI category names to output category names; for example defaults to mapping
73
+ "Homo Species" to "Human", but leaves 99.99% of categories unchanged.
74
+
75
+ Returns:
76
+ dict: COCO-formatted data, identical to what's written to [coco_file_out]
77
+ """
78
+
79
+ ##%% Create COCO dictionaries
80
+
81
+ category_name_to_id = {}
82
+ category_name_to_id['empty'] = 0
83
+
84
+ df = pd.read_csv(csv_file_in)
85
+
86
+ print('Read {} rows from {}'.format(len(df),csv_file_in))
87
+
88
+ image_id_to_image = {}
89
+ image_id_to_annotations = defaultdict(list)
90
+
91
+ # i_row = 0; row = df.iloc[i_row]
92
+ for i_row,row in df.iterrows():
93
+
94
+ image_id = row['image_id']
95
+
96
+ if image_id not in image_id_to_image:
97
+
98
+ im = {}
99
+ image_id_to_image[image_id] = im
100
+
101
+ im['id'] = image_id
102
+
103
+ gs_url = row['location']
104
+ assert gs_url.startswith('gs://')
105
+
106
+ file_name = gs_url.replace('gs://','')
107
+ if gs_prefix is not None:
108
+ file_name = file_name.replace(gs_prefix,'')
109
+
110
+ location_id = _make_location_id(row['project_id'],row['deployment_id'])
111
+ im['file_name'] = file_name
112
+ im['location'] = location_id
113
+ im['datetime'] = row['timestamp']
114
+
115
+ im['wi_image_info'] = {}
116
+ for s in wi_extra_image_columns:
117
+ im['wi_image_info'][s] = str(row[s])
118
+
119
+ else:
120
+
121
+ im = image_id_to_image[image_id]
122
+ assert im['datetime'] == row['timestamp']
123
+ location_id = _make_location_id(row['project_id'],row['deployment_id'])
124
+ assert im['location'] == location_id
125
+
126
+ category_name = row['common_name']
127
+ if category_remappings is not None and category_name in category_remappings:
128
+ category_name = category_remappings[category_name]
129
+
130
+ if category_name == 'Blank':
131
+ category_name = 'empty'
132
+ assert row['is_blank'] == 1
133
+ else:
134
+ assert row['is_blank'] == 0
135
+ assert isinstance(category_name,str)
136
+ if category_name in category_name_to_id:
137
+ category_id = category_name_to_id[category_name]
138
+ else:
139
+ category_id = len(category_name_to_id)
140
+ category_name_to_id[category_name] = category_id
141
+
142
+ ann = {}
143
+ ann['image_id'] = image_id
144
+ annotations_this_image = image_id_to_annotations[image_id]
145
+ annotation_number = len(annotations_this_image)
146
+ ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
147
+ ann['category_id'] = category_id
148
+ annotations_this_image.append(ann)
149
+
150
+ extra_info = {}
151
+ for s in wi_extra_annotation_columns:
152
+ v = row[s]
153
+ if not isnan(v):
154
+ extra_info[s] = v
155
+ ann['wi_extra_info'] = extra_info
156
+
157
+ # ...for each row
158
+
159
+ images = list(image_id_to_image.values())
160
+ categories = []
161
+ for category_name in category_name_to_id:
162
+ category_id = category_name_to_id[category_name]
163
+ categories.append({'id':category_id,'name':category_name})
164
+ annotations = []
165
+ for image_id in image_id_to_annotations:
166
+ annotations_this_image = image_id_to_annotations[image_id]
167
+ for ann in annotations_this_image:
168
+ annotations.append(ann)
169
+ info = {'version':'1.00','description':'converted from WI export'}
170
+ info['source_file'] = csv_file_in
171
+ coco_data = {}
172
+ coco_data['info'] = info
173
+ coco_data['images'] = images
174
+ coco_data['annotations'] = annotations
175
+ coco_data['categories'] = categories
176
+
177
+
178
+ ##%% Validate images, add sizes
179
+
180
+ if validate_images:
181
+
182
+ print('Validating images')
183
+ # TODO: trivially parallelizable
184
+
185
+ assert os.path.isdir(image_folder), \
186
+ 'Must specify a valid image folder if you specify validate_images=True'
187
+
188
+ # im = images[0]
189
+ for im in tqdm(images):
190
+ file_name_relative = im['file_name']
191
+ file_name_abs = os.path.join(image_folder,file_name_relative)
192
+ assert os.path.isfile(file_name_abs)
193
+
194
+ im['corrupt'] = False
195
+ try:
196
+ pil_im = vis_utils.load_image(file_name_abs)
197
+ except Exception:
198
+ im['corrupt'] = True
199
+ if not im['corrupt']:
200
+ im['width'] = pil_im.width
201
+ im['height'] = pil_im.height
202
+
203
+
204
+ ##%% Write output json
205
+
206
+ if coco_file_out is None:
207
+ coco_file_out = csv_file_in + '.json'
208
+
209
+ with open(coco_file_out,'w') as f:
210
+ json.dump(coco_data,f,indent=1)
211
+
212
+
213
+ ##%% Validate output
214
+
215
+ from data_management.databases.integrity_check_json_db import \
216
+ IntegrityCheckOptions,integrity_check_json_db
217
+ options = IntegrityCheckOptions()
218
+ options.baseDir = image_folder
219
+ options.bCheckImageExistence = True
220
+ options.verbose = verbose
221
+ _ = integrity_check_json_db(coco_file_out,options)
222
+
223
+ return coco_data
224
+
225
+ # ...def wi_download_csv_to_coco(...)
226
+
227
+
228
+ #%% Interactive driver
229
+
230
+ if False:
231
+
232
+ #%%
233
+
234
+ base_folder = r'a/b/c'
235
+ csv_file_in = os.path.join(base_folder,'images.csv')
236
+ coco_file_out = None
237
+ gs_prefix = 'a_b_c_main/'
238
+ image_folder = os.path.join(base_folder,'images')
239
+ validate_images = False
240
+ verbose = True
241
+ category_remappings = default_category_remappings
242
+
243
+
244
+ #%% Command-line driver
245
+
246
+ # TODO
@@ -1,43 +1,38 @@
1
- ########
2
- #
3
- # yolo_output_to_md_output.py
4
- #
5
- # Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
6
- #
7
- # Command-line driver not done yet, this has only been run interactively.
8
- #
9
- ########
10
-
11
- ### Converting .txt files ###
12
-
13
- #
14
- # detect.py writes a .txt file per image, in YOLO training format. Converting from this
15
- # format does not currently support recursive results, since detect.py doesn't save filenames
16
- # in a way that allows easy inference of folder names. Requires access to the input
17
- # images, because the YOLO format uses the *absence* of a results file to indicate that
18
- # no detections are present.
19
- #
20
- # YOLOv5 output has one text file per image, like so:
21
- #
22
- # 0 0.0141693 0.469758 0.0283385 0.131552 0.761428
23
- #
24
- # That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
25
- #
26
- # val.py can write in this format as well, using the --save-txt argument.
27
- #
28
- # In both cases, a confidence value is only written to each line if you include the --save-conf
29
- # argument. Confidence values are required by this conversion script.
30
- #
31
-
32
- ### Converting .json files ###
33
-
34
- #
35
- # val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
36
- # just the "images" portion of a COCO .json file.
37
- #
38
- # Converting from this format also requires access to the original images, since the format
39
- # written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
40
- #
1
+ """
2
+
3
+ yolo_output_to_md_output.py
4
+
5
+ Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
6
+
7
+ **Converting .txt files**
8
+
9
+ detect.py writes a .txt file per image, in YOLO training format. Converting from this
10
+ format does not currently support recursive results, since detect.py doesn't save filenames
11
+ in a way that allows easy inference of folder names. Requires access to the input
12
+ images, because the YOLO format uses the *absence* of a results file to indicate that
13
+ no detections are present.
14
+
15
+ YOLOv5 output has one text file per image, like so:
16
+
17
+ 0 0.0141693 0.469758 0.0283385 0.131552 0.761428
18
+
19
+ That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
20
+
21
+ val.py can write in this format as well, using the --save-txt argument.
22
+
23
+ In both cases, a confidence value is only written to each line if you include the --save-conf
24
+ argument. Confidence values are required by this conversion script.
25
+
26
+
27
+ **Converting .json files**
28
+
29
+ val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
30
+ just the "images" portion of a COCO .json file.
31
+
32
+ Converting from this format also requires access to the original images, since the format
33
+ written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
34
+
35
+ """
41
36
 
42
37
  #%% Imports and constants
43
38
 
@@ -51,9 +46,7 @@ from tqdm import tqdm
51
46
 
52
47
  from md_utils import path_utils
53
48
  from md_utils import ct_utils
54
-
55
49
  from md_visualization import visualization_utils as vis_utils
56
-
57
50
  from detection.run_detector import CONF_DIGITS, COORD_DIGITS
58
51
 
59
52
 
@@ -61,9 +54,16 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
61
54
 
62
55
  def read_classes_from_yolo_dataset_file(fn):
63
56
  """
64
- Read a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
57
+ Reads a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
65
58
  dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
66
59
  integer category IDs to string category names.
60
+
61
+ Args:
62
+ fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, or a .json file
63
+ mapping integer category IDs to category names.
64
+
65
+ Returns:
66
+ dict: a mapping from integer category IDs to category names
67
67
  """
68
68
 
69
69
  if fn.endswith('.yml') or fn.endswith('.yaml'):
@@ -92,45 +92,42 @@ def read_classes_from_yolo_dataset_file(fn):
92
92
  raise ValueError('Unrecognized category file type: {}'.format(fn))
93
93
 
94
94
  assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
95
+
95
96
  return category_id_to_name
96
97
 
97
98
 
98
- def yolo_json_output_to_md_output(yolo_json_file, image_folder,
99
- output_file, yolo_category_id_to_name,
99
+ def yolo_json_output_to_md_output(yolo_json_file,
100
+ image_folder,
101
+ output_file,
102
+ yolo_category_id_to_name,
100
103
  detector_name='unknown',
101
104
  image_id_to_relative_path=None,
102
105
  offset_yolo_class_ids=True,
103
106
  truncate_to_standard_md_precision=True,
104
107
  image_id_to_error=None):
105
108
  """
106
- Convert a YOLOv5 .json file to MD .json format.
109
+ Converts a YOLOv5/YOLOv8 .json file to MD .json format.
107
110
 
108
111
  Args:
109
112
 
110
- - yolo_json_file: the .json file to convert from YOLOv5 format to MD output format.
111
-
112
- - image_folder: the .json file contains relative path names, this is the path base.
113
-
114
- - yolo_category_id_to_name: the .json file contains only numeric identifiers for
115
- categories, but we want names and numbers for the output format; this is a
116
- dict mapping numbers to names. Can also be a YOLOv5 dataset.yaml file.
117
-
118
- - detector_name: a string that gets put in the output file, not otherwise used within
119
- this function.
120
-
121
- - image_id_to_relative_path: YOLOv5 .json uses only basenames (e.g. abc1234.JPG);
122
- by default these will be appended to the input path to create pathnames, so if you
123
- have a flat folder, this is fine. If you want to map base names to relative paths, use
124
- this dict.
125
-
126
- - offset_yolo_class_ids: YOLOv5 class IDs always start at zero; if you want to make the
127
- output classes start at 1, set offset_yolo_class_ids to True.
128
-
129
- - truncate_to_standard_md_precision: YOLOv5 .json includes lots of (not-super-meaningful)
130
- precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
131
-
132
- - image_id_to_error: if you want to include image IDs in the output file for which you couldn't
133
- prepare the input file in the first place due to errors, include them here.
113
+ yolo_json_file (str): the .json file to convert from YOLOv5 format to MD output format
114
+ image_folder (str): the .json file contains relative path names, this is the path base
115
+ yolo_category_id_to_name (str or dict): the .json results file contains only numeric
116
+ identifiers for categories, but we want names and numbers for the output format;
117
+ yolo_category_id_to_name provides that mapping either as a dict or as a YOLOv5
118
+ dataset.yaml file.
119
+ detector_name (str, optional): a string that gets put in the output file, not otherwise
120
+ used within this function
121
+ image_id_to_relative_path (dict, optional): YOLOv5 .json uses only basenames (e.g.
122
+ abc1234.JPG); by default these will be appended to the input path to create pathnames.
123
+ If you have a flat folder, this is fine. If you want to map base names to relative paths in
124
+ a more complicated way, use this parameter.
125
+ offset_yolo_class_ids (bool, optional): YOLOv5 class IDs always start at zero; if you want to
126
+ make the output classes start at 1, set offset_yolo_class_ids to True.
127
+ truncate_to_standard_md_precision (bool, optional): YOLOv5 .json includes lots of
128
+ (not-super-meaningful) precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
129
+ image_id_to_error (dict, optional): if you want to include image IDs in the output file for which
130
+ you couldn't prepare the input file in the first place due to errors, include them here.
134
131
  """
135
132
 
136
133
  assert os.path.isfile(yolo_json_file), \
@@ -314,14 +311,25 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
314
311
  # ...def yolo_json_output_to_md_output(...)
315
312
 
316
313
 
317
- def yolo_txt_output_to_md_output(input_results_folder, image_folder,
318
- output_file, detector_tag=None):
314
+ def yolo_txt_output_to_md_output(input_results_folder,
315
+ image_folder,
316
+ output_file,
317
+ detector_tag=None):
319
318
  """
320
- Converts a folder of YOLO-outptu .txt files to MD .json format.
319
+ Converts a folder of YOLO-output .txt files to MD .json format.
321
320
 
322
321
  Less finished than the .json conversion function; this .txt conversion assumes
323
322
  a hard-coded mapping representing the standard MD categories (in MD indexing,
324
323
  1/2/3=animal/person/vehicle; in YOLO indexing, 0/1/2=animal/person/vehicle).
324
+
325
+ Args:
326
+ input_results_folder (str): the folder containing YOLO-output .txt files
327
+ image_folder (str): the folder where images live, may be the same as
328
+ [input_results_folder]
329
+ output_file (str): the MD-formatted .json file to which we should write
330
+ results
331
+ detector_tag (str, optional): string to put in the 'detector' field in the
332
+ output file
325
333
  """
326
334
 
327
335
  assert os.path.isdir(input_results_folder)
@@ -426,3 +434,8 @@ if False:
426
434
  image_folder = os.path.expanduser('~/data/KRU-test')
427
435
  output_file = os.path.expanduser('~/data/mdv5a-yolo-pt-kru.json')
428
436
  yolo_txt_output_to_md_output(input_results_folder,image_folder,output_file)
437
+
438
+
439
+ #%% Command-line driver
440
+
441
+ # TODO