megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,247 @@
1
+ """
2
+
3
+ wi_download_csv_to_coco.py
4
+
5
+ Converts a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
6
+
7
+ Currently assumes that common names are unique identifiers, which is convenient but unreliable.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+ import pandas as pd
16
+
17
+ from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ from megadetector.visualization import visualization_utils as vis_utils
21
+ from megadetector.utils.ct_utils import isnan
22
+
23
+ wi_extra_annotation_columns = \
24
+ ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
25
+ 'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
26
+ 'behavior','highlighted','markings')
27
+
28
+ wi_extra_image_columns = ('project_id','deployment_id')
29
+
30
+ def _make_location_id(project_id,deployment_id):
31
+ return 'project_' + str(project_id) + '_deployment_' + deployment_id
32
+
33
+ default_category_remappings = {
34
+ 'Homo Species':'Human',
35
+ 'Human-Camera Trapper':'Human',
36
+ 'No CV Result':'Unknown'
37
+ }
38
+
39
+
40
+ #%% Main function
41
+
42
+ def wi_download_csv_to_coco(csv_file_in,
43
+ coco_file_out=None,
44
+ image_folder=None,
45
+ validate_images=False,
46
+ gs_prefix=None,
47
+ verbose=True,
48
+ category_remappings=default_category_remappings):
49
+ """
50
+ Converts a .csv file from a Wildlife Insights project export to a COCO
51
+ Camera Traps .json file.
52
+
53
+ Args:
54
+ csv_file_in (str): the downloaded .csv file we should convert to COCO
55
+ coco_file_out (str, optional): the .json file we should write; if [coco_file_out] is None,
56
+ uses [csv_file_in].json
57
+ image_folder (str, optional): the folder where images live, only relevant if
58
+ [validate_images] is True
59
+ validate_images (bool, optional): whether to check images for corruption and load
60
+ image sizes; if this is True, [image_folder] must be a valid folder
61
+ gs_prefix (str, optional): a string to remove from GS URLs to convert to path names...
62
+ for example, if your gs:// URLs look like:
63
+
64
+ `gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg`
65
+
66
+ ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
67
+ the .json file will look like:
68
+
69
+ `55554/dfadfasdfs.jpg`
70
+ verbose (bool, optional): enable additional debug console output
71
+ category_remappings (dict, optional): str --> str dict that maps any number of
72
+ WI category names to output category names; for example defaults to mapping
73
+ "Homo Species" to "Human", but leaves 99.99% of categories unchanged.
74
+
75
+ Returns:
76
+ dict: COCO-formatted data, identical to what's written to [coco_file_out]
77
+ """
78
+
79
+ ##%% Create COCO dictionaries
80
+
81
+ category_name_to_id = {}
82
+ category_name_to_id['empty'] = 0
83
+
84
+ df = pd.read_csv(csv_file_in)
85
+
86
+ print('Read {} rows from {}'.format(len(df),csv_file_in))
87
+
88
+ image_id_to_image = {}
89
+ image_id_to_annotations = defaultdict(list)
90
+
91
+ # i_row = 0; row = df.iloc[i_row]
92
+ for i_row,row in df.iterrows():
93
+
94
+ image_id = row['image_id']
95
+
96
+ if image_id not in image_id_to_image:
97
+
98
+ im = {}
99
+ image_id_to_image[image_id] = im
100
+
101
+ im['id'] = image_id
102
+
103
+ gs_url = row['location']
104
+ assert gs_url.startswith('gs://')
105
+
106
+ file_name = gs_url.replace('gs://','')
107
+ if gs_prefix is not None:
108
+ file_name = file_name.replace(gs_prefix,'')
109
+
110
+ location_id = _make_location_id(row['project_id'],row['deployment_id'])
111
+ im['file_name'] = file_name
112
+ im['location'] = location_id
113
+ im['datetime'] = row['timestamp']
114
+
115
+ im['wi_image_info'] = {}
116
+ for s in wi_extra_image_columns:
117
+ im['wi_image_info'][s] = str(row[s])
118
+
119
+ else:
120
+
121
+ im = image_id_to_image[image_id]
122
+ assert im['datetime'] == row['timestamp']
123
+ location_id = _make_location_id(row['project_id'],row['deployment_id'])
124
+ assert im['location'] == location_id
125
+
126
+ category_name = row['common_name']
127
+ if category_remappings is not None and category_name in category_remappings:
128
+ category_name = category_remappings[category_name]
129
+
130
+ if category_name == 'Blank':
131
+ category_name = 'empty'
132
+ assert row['is_blank'] == 1
133
+ else:
134
+ assert row['is_blank'] == 0
135
+ assert isinstance(category_name,str)
136
+ if category_name in category_name_to_id:
137
+ category_id = category_name_to_id[category_name]
138
+ else:
139
+ category_id = len(category_name_to_id)
140
+ category_name_to_id[category_name] = category_id
141
+
142
+ ann = {}
143
+ ann['image_id'] = image_id
144
+ annotations_this_image = image_id_to_annotations[image_id]
145
+ annotation_number = len(annotations_this_image)
146
+ ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
147
+ ann['category_id'] = category_id
148
+ annotations_this_image.append(ann)
149
+
150
+ extra_info = {}
151
+ for s in wi_extra_annotation_columns:
152
+ v = row[s]
153
+ if not isnan(v):
154
+ extra_info[s] = v
155
+ ann['wi_extra_info'] = extra_info
156
+
157
+ # ...for each row
158
+
159
+ images = list(image_id_to_image.values())
160
+ categories = []
161
+ for category_name in category_name_to_id:
162
+ category_id = category_name_to_id[category_name]
163
+ categories.append({'id':category_id,'name':category_name})
164
+ annotations = []
165
+ for image_id in image_id_to_annotations:
166
+ annotations_this_image = image_id_to_annotations[image_id]
167
+ for ann in annotations_this_image:
168
+ annotations.append(ann)
169
+ info = {'version':'1.00','description':'converted from WI export'}
170
+ info['source_file'] = csv_file_in
171
+ coco_data = {}
172
+ coco_data['info'] = info
173
+ coco_data['images'] = images
174
+ coco_data['annotations'] = annotations
175
+ coco_data['categories'] = categories
176
+
177
+
178
+ ##%% Validate images, add sizes
179
+
180
+ if validate_images:
181
+
182
+ print('Validating images')
183
+
184
+ assert os.path.isdir(image_folder), \
185
+ 'Must specify a valid image folder if you specify validate_images=True'
186
+
187
+ # TODO: trivially parallelizable
188
+ #
189
+ # im = images[0]
190
+ for im in tqdm(images):
191
+ file_name_relative = im['file_name']
192
+ file_name_abs = os.path.join(image_folder,file_name_relative)
193
+ assert os.path.isfile(file_name_abs)
194
+
195
+ im['corrupt'] = False
196
+ try:
197
+ pil_im = vis_utils.load_image(file_name_abs)
198
+ except Exception:
199
+ im['corrupt'] = True
200
+ if not im['corrupt']:
201
+ im['width'] = pil_im.width
202
+ im['height'] = pil_im.height
203
+
204
+
205
+ ##%% Write output json
206
+
207
+ if coco_file_out is None:
208
+ coco_file_out = csv_file_in + '.json'
209
+
210
+ with open(coco_file_out,'w') as f:
211
+ json.dump(coco_data,f,indent=1)
212
+
213
+
214
+ ##%% Validate output
215
+
216
+ from megadetector.data_management.databases.integrity_check_json_db import \
217
+ IntegrityCheckOptions,integrity_check_json_db
218
+ options = IntegrityCheckOptions()
219
+ options.baseDir = image_folder
220
+ options.bCheckImageExistence = True
221
+ options.verbose = verbose
222
+ _ = integrity_check_json_db(coco_file_out,options)
223
+
224
+ return coco_data
225
+
226
+ # ...def wi_download_csv_to_coco(...)
227
+
228
+
229
+ #%% Interactive driver
230
+
231
+ if False:
232
+
233
+ #%%
234
+
235
+ base_folder = r'a/b/c'
236
+ csv_file_in = os.path.join(base_folder,'images.csv')
237
+ coco_file_out = None
238
+ gs_prefix = 'a_b_c_main/'
239
+ image_folder = os.path.join(base_folder,'images')
240
+ validate_images = False
241
+ verbose = True
242
+ category_remappings = default_category_remappings
243
+
244
+
245
+ #%% Command-line driver
246
+
247
+ # TODO
@@ -0,0 +1,446 @@
1
+ """
2
+
3
+ yolo_output_to_md_output.py
4
+
5
+ Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
6
+
7
+ **Converting .txt files**
8
+
9
+ detect.py writes a .txt file per image, in YOLO training format. Converting from this
10
+ format does not currently support recursive results, since detect.py doesn't save filenames
11
+ in a way that allows easy inference of folder names. Requires access to the input
12
+ images, because the YOLO format uses the *absence* of a results file to indicate that
13
+ no detections are present.
14
+
15
+ YOLOv5 output has one text file per image, like so:
16
+
17
+ 0 0.0141693 0.469758 0.0283385 0.131552 0.761428
18
+
19
+ That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
20
+
21
+ val.py can write in this format as well, using the --save-txt argument.
22
+
23
+ In both cases, a confidence value is only written to each line if you include the --save-conf
24
+ argument. Confidence values are required by this conversion script.
25
+
26
+
27
+ **Converting .json files**
28
+
29
+ val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
30
+ just the "images" portion of a COCO .json file.
31
+
32
+ Converting from this format also requires access to the original images, since the format
33
+ written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
34
+
35
+ """
36
+
37
+ #%% Imports and constants
38
+
39
+ import json
40
+ import csv
41
+ import os
42
+ import re
43
+
44
+ from collections import defaultdict
45
+ from tqdm import tqdm
46
+
47
+ from megadetector.utils import path_utils
48
+ from megadetector.utils import ct_utils
49
+ from megadetector.visualization import visualization_utils as vis_utils
50
+ from megadetector.detection.run_detector import CONF_DIGITS, COORD_DIGITS
51
+
52
+
53
+ #%% Support functions
54
+
55
+ def read_classes_from_yolo_dataset_file(fn):
56
+ """
57
+ Reads a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
58
+ dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
59
+ integer category IDs to string category names.
60
+
61
+ Args:
62
+ fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, or a .json file
63
+ mapping integer category IDs to category names.
64
+
65
+ Returns:
66
+ dict: a mapping from integer category IDs to category names
67
+ """
68
+
69
+ if fn.endswith('.yml') or fn.endswith('.yaml'):
70
+
71
+ with open(fn,'r') as f:
72
+ lines = f.readlines()
73
+
74
+ category_id_to_name = {}
75
+ pat = '\d+:.+'
76
+ for s in lines:
77
+ if re.search(pat,s) is not None:
78
+ tokens = s.split(':')
79
+ assert len(tokens) == 2, 'Invalid token in category file {}'.format(fn)
80
+ category_id_to_name[int(tokens[0].strip())] = tokens[1].strip()
81
+
82
+ elif fn.endswith('.json'):
83
+
84
+ with open(fn,'r') as f:
85
+ d_in = json.load(f)
86
+ category_id_to_name = {}
87
+ for k in d_in.keys():
88
+ category_id_to_name[int(k)] = d_in[k]
89
+
90
+ else:
91
+
92
+ raise ValueError('Unrecognized category file type: {}'.format(fn))
93
+
94
+ assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
95
+
96
+ return category_id_to_name
97
+
98
+
99
+ def yolo_json_output_to_md_output(yolo_json_file,
100
+ image_folder,
101
+ output_file,
102
+ yolo_category_id_to_name,
103
+ detector_name='unknown',
104
+ image_id_to_relative_path=None,
105
+ offset_yolo_class_ids=True,
106
+ truncate_to_standard_md_precision=True,
107
+ image_id_to_error=None,
108
+ convert_slashes=True):
109
+ """
110
+ Converts a YOLOv5/YOLOv8 .json file to MD .json format.
111
+
112
+ Args:
113
+
114
+ yolo_json_file (str): the .json file to convert from YOLOv5 format to MD output format
115
+ image_folder (str): the .json file contains relative path names, this is the path base
116
+ yolo_category_id_to_name (str or dict): the .json results file contains only numeric
117
+ identifiers for categories, but we want names and numbers for the output format;
118
+ yolo_category_id_to_name provides that mapping either as a dict or as a YOLOv5
119
+ dataset.yaml file.
120
+ detector_name (str, optional): a string that gets put in the output file, not otherwise
121
+ used within this function
122
+ image_id_to_relative_path (dict, optional): YOLOv5 .json uses only basenames (e.g.
123
+ abc1234.JPG); by default these will be appended to the input path to create pathnames.
124
+ If you have a flat folder, this is fine. If you want to map base names to relative paths in
125
+ a more complicated way, use this parameter.
126
+ offset_yolo_class_ids (bool, optional): YOLOv5 class IDs always start at zero; if you want to
127
+ make the output classes start at 1, set offset_yolo_class_ids to True.
128
+ truncate_to_standard_md_precision (bool, optional): YOLOv5 .json includes lots of
129
+ (not-super-meaningful) precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
130
+ image_id_to_error (dict, optional): if you want to include image IDs in the output file for which
131
+ you couldn't prepare the input file in the first place due to errors, include them here.
132
+ convert_slashes (bool, optional): force all slashes to be forward slashes in the output file
133
+ """
134
+
135
+ assert os.path.isfile(yolo_json_file), \
136
+ 'Could not find YOLO .json file {}'.format(yolo_json_file)
137
+ assert os.path.isdir(image_folder), \
138
+ 'Could not find image folder {}'.format(image_folder)
139
+
140
+ if image_id_to_error is None:
141
+ image_id_to_error = {}
142
+
143
+ print('Converting {} to MD format and writing results to {}'.format(
144
+ yolo_json_file,output_file))
145
+
146
+ if isinstance(yolo_category_id_to_name,str):
147
+ assert os.path.isfile(yolo_category_id_to_name), \
148
+ 'YOLO category mapping specified as a string, but file does not exist: {}'.format(
149
+ yolo_category_id_to_name)
150
+ yolo_category_id_to_name = read_classes_from_yolo_dataset_file(yolo_category_id_to_name)
151
+
152
+ if image_id_to_relative_path is None:
153
+
154
+ image_files = path_utils.find_images(image_folder,recursive=True)
155
+ image_files = [os.path.relpath(fn,image_folder) for fn in image_files]
156
+
157
+ # YOLOv5 identifies images in .json output by ID, which is the filename without
158
+ # extension. If a mapping is not provided, these need to be unique.
159
+ image_id_to_relative_path = {}
160
+
161
+ for fn in image_files:
162
+ image_id = os.path.splitext(os.path.basename(fn))[0]
163
+ if image_id in image_id_to_relative_path:
164
+ print('Error: image ID {} refers to:\n{}\n{}'.format(
165
+ image_id,image_id_to_relative_path[image_id],fn))
166
+ raise ValueError('Duplicate image ID {}'.format(image_id))
167
+ image_id_to_relative_path[image_id] = fn
168
+
169
+ image_files_relative = sorted(list(image_id_to_relative_path.values()))
170
+
171
+ image_file_relative_to_image_id = {}
172
+ for image_id in image_id_to_relative_path:
173
+ relative_path = image_id_to_relative_path[image_id]
174
+ assert relative_path not in image_file_relative_to_image_id, \
175
+ 'Duplication image IDs in YOLO output conversion for image {}'.format(relative_path)
176
+ image_file_relative_to_image_id[relative_path] = image_id
177
+
178
+ with open(yolo_json_file,'r') as f:
179
+ detections = json.load(f)
180
+ assert isinstance(detections,list)
181
+
182
+ image_id_to_detections = defaultdict(list)
183
+
184
+ int_formatted_image_ids = False
185
+
186
+ # det = detections[0]
187
+ for det in detections:
188
+
189
+ # This could be a string, but if the YOLOv5 inference script sees that the strings
190
+ # are really ints, it converts to ints.
191
+ image_id = det['image_id']
192
+ image_id_to_detections[image_id].append(det)
193
+ if isinstance(image_id,int):
194
+ int_formatted_image_ids = True
195
+
196
+ # If there are any ints present, everything should be ints
197
+ if int_formatted_image_ids:
198
+ for det in detections:
199
+ assert isinstance(det['image_id'],int), \
200
+ 'Found mixed int and string image IDs'
201
+
202
+ # Convert the keys in image_id_to_error to ints
203
+ #
204
+ # This should error if we're given non-int-friendly IDs
205
+ int_formatted_image_id_to_error = {}
206
+ for image_id in image_id_to_error:
207
+ int_formatted_image_id_to_error[int(image_id)] = \
208
+ image_id_to_error[image_id]
209
+ image_id_to_error = int_formatted_image_id_to_error
210
+
211
+ # ...if image IDs are formatted as integers in YOLO output
212
+
213
+ # In a modified version of val.py, we use negative category IDs to indicate an error
214
+ # that happened during inference (typically truncated images with valid headers,
215
+ # so corruption was not detected during val.py's initial corruption check pass.
216
+ for det in detections:
217
+ if det['category_id'] < 0:
218
+ assert 'error' in det, 'Negative category ID present with no error string'
219
+ error_string = det['error']
220
+ print('Caught inference-time failure {} for image {}'.format(error_string,det['image_id']))
221
+ image_id_to_error[det['image_id']] = error_string
222
+
223
+ output_images = []
224
+
225
+ # image_file_relative = image_files_relative[10]
226
+ for image_file_relative in tqdm(image_files_relative):
227
+
228
+ im = {}
229
+ im['file'] = image_file_relative
230
+ if convert_slashes:
231
+ im['file'] = im['file'].replace('\\','/')
232
+
233
+ image_id = image_file_relative_to_image_id[image_file_relative]
234
+ if int_formatted_image_ids:
235
+ image_id = int(image_id)
236
+ if image_id in image_id_to_error:
237
+ im['failure'] = str(image_id_to_error[image_id])
238
+ output_images.append(im)
239
+ continue
240
+ elif image_id not in image_id_to_detections:
241
+ detections = []
242
+ else:
243
+ detections = image_id_to_detections[image_id]
244
+
245
+ image_full_path = os.path.join(image_folder,image_file_relative)
246
+ try:
247
+ pil_im = vis_utils.open_image(image_full_path)
248
+ except Exception as e:
249
+ s = str(e).replace('\n',' ')
250
+ print('Warning: error opening image {}: {}, outputting as a failure'.format(image_full_path,s))
251
+ im['failure'] = 'Conversion error: {}'.format(s)
252
+ output_images.append(im)
253
+ continue
254
+
255
+ im['detections'] = []
256
+
257
+ image_w = pil_im.size[0]
258
+ image_h = pil_im.size[1]
259
+
260
+ # det = detections[0]
261
+ for det in detections:
262
+
263
+ output_det = {}
264
+
265
+ yolo_cat_id = int(det['category_id'])
266
+ if offset_yolo_class_ids:
267
+ yolo_cat_id += 1
268
+ output_det['category'] = str(int(yolo_cat_id))
269
+ conf = det['score']
270
+ if truncate_to_standard_md_precision:
271
+ conf = ct_utils.truncate_float(conf,CONF_DIGITS)
272
+ output_det['conf'] = conf
273
+ input_bbox = det['bbox']
274
+
275
+ # YOLO's COCO .json is not *that* COCO-like, but it is COCO-like in
276
+ # that the boxes are already [xmin/ymin/w/h]
277
+ box_xmin_absolute = input_bbox[0]
278
+ box_ymin_absolute = input_bbox[1]
279
+ box_width_absolute = input_bbox[2]
280
+ box_height_absolute = input_bbox[3]
281
+
282
+ box_xmin_relative = box_xmin_absolute / image_w
283
+ box_ymin_relative = box_ymin_absolute / image_h
284
+ box_width_relative = box_width_absolute / image_w
285
+ box_height_relative = box_height_absolute / image_h
286
+
287
+ output_bbox = [box_xmin_relative,box_ymin_relative,
288
+ box_width_relative,box_height_relative]
289
+
290
+ if truncate_to_standard_md_precision:
291
+ output_bbox = ct_utils.truncate_float_array(output_bbox,COORD_DIGITS)
292
+
293
+ output_det['bbox'] = output_bbox
294
+ im['detections'].append(output_det)
295
+
296
+ # ...for each detection
297
+
298
+ output_images.append(im)
299
+
300
+ # ...for each image file
301
+
302
+ d = {}
303
+ d['images'] = output_images
304
+ d['info'] = {'format_version':1.3,'detector':detector_name}
305
+ d['detection_categories'] = {}
306
+
307
+ for cat_id in yolo_category_id_to_name:
308
+ yolo_cat_id = int(cat_id)
309
+ if offset_yolo_class_ids:
310
+ yolo_cat_id += 1
311
+ d['detection_categories'][str(yolo_cat_id)] = yolo_category_id_to_name[cat_id]
312
+
313
+ with open(output_file,'w') as f:
314
+ json.dump(d,f,indent=1)
315
+
316
+ # ...def yolo_json_output_to_md_output(...)
317
+
318
+
319
+ def yolo_txt_output_to_md_output(input_results_folder,
320
+ image_folder,
321
+ output_file,
322
+ detector_tag=None):
323
+ """
324
+ Converts a folder of YOLO-output .txt files to MD .json format.
325
+
326
+ Less finished than the .json conversion function; this .txt conversion assumes
327
+ a hard-coded mapping representing the standard MD categories (in MD indexing,
328
+ 1/2/3=animal/person/vehicle; in YOLO indexing, 0/1/2=animal/person/vehicle).
329
+
330
+ Args:
331
+ input_results_folder (str): the folder containing YOLO-output .txt files
332
+ image_folder (str): the folder where images live, may be the same as
333
+ [input_results_folder]
334
+ output_file (str): the MD-formatted .json file to which we should write
335
+ results
336
+ detector_tag (str, optional): string to put in the 'detector' field in the
337
+ output file
338
+ """
339
+
340
+ assert os.path.isdir(input_results_folder)
341
+ assert os.path.isdir(image_folder)
342
+
343
+ ## Enumerate results files and image files
344
+
345
+ yolo_results_files = os.listdir(input_results_folder)
346
+ yolo_results_files = [f for f in yolo_results_files if f.lower().endswith('.txt')]
347
+ # print('Found {} results files'.format(len(yolo_results_files)))
348
+
349
+ image_files = path_utils.find_images(image_folder,recursive=False)
350
+ image_files_relative = [os.path.basename(f) for f in image_files]
351
+ # print('Found {} images'.format(len(image_files)))
352
+
353
+ image_files_relative_no_extension = [os.path.splitext(f)[0] for f in image_files_relative]
354
+
355
+ ## Make sure that every results file corresponds to an image
356
+
357
+ for f in yolo_results_files:
358
+ result_no_extension = os.path.splitext(f)[0]
359
+ assert result_no_extension in image_files_relative_no_extension
360
+
361
+ ## Build MD output data
362
+
363
+ # Map 0-indexed YOLO categories to 1-indexed MD categories
364
+ yolo_cat_map = { 0: 1, 1: 2, 2: 3 }
365
+
366
+ images_entries = []
367
+
368
+ # image_fn = image_files_relative[0]
369
+ for image_fn in image_files_relative:
370
+
371
+ image_name, ext = os.path.splitext(image_fn)
372
+ label_fn = image_name + '.txt'
373
+ label_path = os.path.join(input_results_folder, label_fn)
374
+
375
+ detections = []
376
+
377
+ if not os.path.exists(label_path):
378
+ # This is assumed to be an image with no detections
379
+ pass
380
+ else:
381
+ with open(label_path, newline='') as f:
382
+ reader = csv.reader(f, delimiter=' ')
383
+ for row in reader:
384
+ category = yolo_cat_map[int(row[0])]
385
+ api_box = ct_utils.convert_yolo_to_xywh([float(row[1]), float(row[2]),
386
+ float(row[3]), float(row[4])])
387
+
388
+ conf = ct_utils.truncate_float(float(row[5]), precision=4)
389
+
390
+ detections.append({
391
+ 'category': str(category),
392
+ 'conf': conf,
393
+ 'bbox': ct_utils.truncate_float_array(api_box, precision=4)
394
+ })
395
+
396
+ images_entries.append({
397
+ 'file': image_fn,
398
+ 'detections': detections
399
+ })
400
+
401
+ # ...for each image
402
+
403
+ ## Save output file
404
+
405
+ detector_string = 'converted_from_yolo_format'
406
+
407
+ if detector_tag is not None:
408
+ detector_string = detector_tag
409
+
410
+ output_content = {
411
+ 'info': {
412
+ 'detector': detector_string,
413
+ 'detector_metadata': {},
414
+ 'format_version': '1.3'
415
+ },
416
+ 'detection_categories': {
417
+ '1': 'animal',
418
+ '2': 'person',
419
+ '3': 'vehicle'
420
+ },
421
+ 'images': images_entries
422
+ }
423
+
424
+ with open(output_file,'w') as f:
425
+ json.dump(output_content,f,indent=1)
426
+
427
+ # ...def yolo_txt_output_to_md_output(...)
428
+
429
+
430
+ #%% Interactive driver
431
+
432
+ if False:
433
+
434
+ pass
435
+
436
+ #%%
437
+
438
+ input_results_folder = os.path.expanduser('~/tmp/model-version-experiments/pt-test-kru/exp/labels')
439
+ image_folder = os.path.expanduser('~/data/KRU-test')
440
+ output_file = os.path.expanduser('~/data/mdv5a-yolo-pt-kru.json')
441
+ yolo_txt_output_to_md_output(input_results_folder,image_folder,output_file)
442
+
443
+
444
+ #%% Command-line driver
445
+
446
+ # TODO