megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,8 @@ Includes functions to read/write the (very very old) .csv results format.
18
18
  import json
19
19
  import os
20
20
 
21
- from typing import Dict, Mapping, Optional, Tuple
21
+ from typing import Optional
22
+ from collections.abc import Mapping
22
23
 
23
24
  import pandas as pd
24
25
 
@@ -31,28 +32,28 @@ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
31
32
  def load_api_results(api_output_path: str, normalize_paths: bool = True,
32
33
  filename_replacements: Optional[Mapping[str, str]] = None,
33
34
  force_forward_slashes: bool = True
34
- ) -> Tuple[pd.DataFrame, Dict]:
35
+ ) -> tuple[pd.DataFrame, dict]:
35
36
  r"""
36
37
  Loads json-formatted MegaDetector results to a Pandas DataFrame.
37
38
 
38
39
  Args:
39
- api_output_path: path to the output json file
40
- normalize_paths: whether to apply os.path.normpath to the 'file' field
41
- in each image entry in the output file
42
- filename_replacements: replace some path tokens to match local paths to
43
- the original blob structure
44
- force_forward_slashes: whether to convert backslashes to forward slashes
45
- in filenames
40
+ api_output_path (str): path to the output json file
41
+ normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
42
+ field in each image entry in the output file
43
+ filename_replacements (dict, optional): replace some path tokens to match local paths
44
+ to the original file structure
45
+ force_forward_slashes (bool, optional): whether to convert backslashes to forward
46
+ slashes in filenames
46
47
 
47
48
  Returns:
48
49
  detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
49
50
  other_fields: a dict containing fields in the results other than 'images'
50
51
  """
51
-
52
+
52
53
  print('Loading results from {}'.format(api_output_path))
53
54
 
54
55
  detection_results = load_md_or_speciesnet_file(api_output_path)
55
-
56
+
56
57
  # Validate that this is really a detector output file
57
58
  for s in ['info', 'detection_categories', 'images']:
58
59
  assert s in detection_results, 'Missing field {} in detection results'.format(s)
@@ -65,12 +66,12 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
65
66
 
66
67
  if normalize_paths:
67
68
  for image in detection_results['images']:
68
- image['file'] = os.path.normpath(image['file'])
69
+ image['file'] = os.path.normpath(image['file'])
69
70
 
70
71
  if force_forward_slashes:
71
72
  for image in detection_results['images']:
72
73
  image['file'] = image['file'].replace('\\','/')
73
-
74
+
74
75
  # Replace some path tokens to match local paths to original blob structure
75
76
  if filename_replacements is not None:
76
77
  for string_to_replace in filename_replacements.keys():
@@ -79,16 +80,16 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
79
80
  im['file'] = im['file'].replace(string_to_replace,replacement_string)
80
81
 
81
82
  print('Converting results to dataframe')
82
-
83
+
83
84
  # If this is a newer file that doesn't include maximum detection confidence values,
84
85
  # add them, because our unofficial internal dataframe format includes this.
85
86
  for im in detection_results['images']:
86
87
  if 'max_detection_conf' not in im:
87
88
  im['max_detection_conf'] = ct_utils.get_max_conf(im)
88
-
89
+
89
90
  # Pack the json output into a Pandas DataFrame
90
91
  detection_results = pd.DataFrame(detection_results['images'])
91
-
92
+
92
93
  print('Finished loading MegaDetector results for {} images from {}'.format(
93
94
  len(detection_results),api_output_path))
94
95
 
@@ -98,6 +99,11 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
98
99
  def write_api_results(detection_results_table, other_fields, out_path):
99
100
  """
100
101
  Writes a Pandas DataFrame to the MegaDetector .json format.
102
+
103
+ Args:
104
+ detection_results_table (DataFrame): data to write
105
+ other_fields (dict): additional fields to include in the output .json
106
+ out_path (str): output .json filename
101
107
  """
102
108
 
103
109
  print('Writing detection results to {}'.format(out_path))
@@ -111,7 +117,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
111
117
  if 'failure' in im and im['failure'] is None:
112
118
  del im['failure']
113
119
  fields['images'] = images
114
-
120
+
115
121
  # Convert the 'version' field back to a string as per format convention
116
122
  try:
117
123
  version = other_fields['info']['format_version']
@@ -120,7 +126,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
120
126
  except Exception:
121
127
  print('Warning: error determining format version')
122
128
  pass
123
-
129
+
124
130
  # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
125
131
  try:
126
132
  version = other_fields['info']['format_version']
@@ -132,20 +138,31 @@ def write_api_results(detection_results_table, other_fields, out_path):
132
138
  except Exception:
133
139
  print('Warning: error removing max_detection_conf from output')
134
140
  pass
135
-
141
+
136
142
  with open(out_path, 'w') as f:
137
143
  json.dump(fields, f, indent=1)
138
144
 
139
145
  print('Finished writing detection results to {}'.format(out_path))
140
146
 
141
147
 
142
- def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
148
+ def load_api_results_csv(filename, normalize_paths=True, filename_replacements=None, nrows=None):
143
149
  """
144
150
  [DEPRECATED]
145
-
151
+
146
152
  Loads .csv-formatted MegaDetector results to a pandas table
153
+
154
+ Args:
155
+ filename (str): path to the csv file to read
156
+ normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
157
+ field in each image entry in the output file
158
+ filename_replacements (dict, optional): replace some path tokens to match local paths
159
+ to the original file structure
160
+ nrows (int, optional): read only the first N rows of [filename]
147
161
  """
148
162
 
163
+ if filename_replacements is None:
164
+ filename_replacements = {}
165
+
149
166
  print('Loading MegaDetector results from {}'.format(filename))
150
167
 
151
168
  detection_results = pd.read_csv(filename,nrows=nrows)
@@ -169,12 +186,12 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
169
186
 
170
187
  replacement_string = filename_replacements[string_to_replace]
171
188
 
172
- # iRow = 0
173
- for iRow in range(0,len(detection_results)):
174
- row = detection_results.iloc[iRow]
189
+ # i_row = 0
190
+ for i_row in range(0,len(detection_results)):
191
+ row = detection_results.iloc[i_row]
175
192
  fn = row['image_path']
176
193
  fn = fn.replace(string_to_replace,replacement_string)
177
- detection_results.at[iRow,'image_path'] = fn
194
+ detection_results.at[i_row,'image_path'] = fn
178
195
 
179
196
  print('Finished loading and de-serializing MD results for {} images from {}'.format(
180
197
  len(detection_results),filename))
@@ -183,12 +200,16 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
183
200
 
184
201
 
185
202
  def write_api_results_csv(detection_results, filename):
186
- """
203
+ """
187
204
  [DEPRECATED]
188
-
205
+
189
206
  Writes a Pandas table to csv in a way that's compatible with the .csv output
190
207
  format. Currently just a wrapper around to_csv that forces output writing
191
208
  to go through a common code path.
209
+
210
+ Args:
211
+ detection_results (DataFrame): dataframe to write to [filename]
212
+ filename (str): .csv filename to write
192
213
  """
193
214
 
194
215
  print('Writing detection results to {}'.format(filename))
@@ -3,7 +3,7 @@
3
3
  md_to_coco.py
4
4
 
5
5
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
- this is an opinionated transformation that requires a confidence threshold for most
6
+ this is an opinionated transformation that requires a confidence threshold for most
7
7
  applications.
8
8
 
9
9
  Does not currently handle classification information.
@@ -15,6 +15,8 @@ Does not currently handle classification information.
15
15
  import os
16
16
  import json
17
17
  import uuid
18
+ import sys
19
+ import argparse
18
20
 
19
21
  from tqdm import tqdm
20
22
 
@@ -36,44 +38,45 @@ def md_to_coco(md_results_file,
36
38
  include_failed_images=True,
37
39
  include_annotations_without_bounding_boxes=True,
38
40
  empty_category_id='0',
39
- overwrite_behavior='skip',
41
+ overwrite_behavior='skip',
40
42
  verbose=True,
41
- image_filename_to_size=None):
43
+ image_filename_to_size=None,
44
+ unrecognized_category_handling='error'):
42
45
  """
43
46
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
44
47
  this is an opinionated transformation that typically requires a confidence threshold.
45
-
46
- The default confidence threshold is not 0; the assumption is that by default, you are
48
+
49
+ The default confidence threshold is not 0; the assumption is that by default, you are
47
50
  going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
48
- file to *evaluate* a detector, rather than as a set of labels, you likely want a
49
- confidence threshold of 0. Confidence values will be written to the semi-standard "score"
51
+ file to *evaluate* a detector, rather than as a set of labels, you likely want a
52
+ confidence threshold of 0. Confidence values will be written to the semi-standard "score"
50
53
  field for each image (regardless of the threshold) if preserve_nonstandard_metadata is True.
51
-
52
- A folder of images is required if width and height information are not available
54
+
55
+ A folder of images is required if width and height information are not available
53
56
  in the MD results file.
54
57
 
55
58
  Args:
56
- md_results_file (str): MD results .json file to convert to COCO
59
+ md_results_file (str): MD results .json file to convert to COCO
57
60
  format
58
- coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
61
+ coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
59
62
  a COCO-formatted dict, but won't write it to disk. If this is 'auto', we'll write to
60
63
  [md_results_file_without_extension].coco.json.
61
64
  image_folder (str, optional): folder of images, required if 'width' and 'height' are not
62
65
  present in the MD results file (they are not required by the format)
63
66
  confidence_threshold (float, optional): boxes below this confidence threshold will not be
64
67
  included in the output data
65
- validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
68
+ validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
66
69
  regardless of whether "width" and "height" are present in the MD results file.
67
70
  info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
68
71
  output
69
- preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
70
- non-standard "score" field in each annotation, and any random fields present in each image's
71
- data (e.g. EXIF metadata) will be propagated to COCO output
72
- include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
72
+ preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
73
+ non-standard "score" field in each annotation, and any random fields present in each image's
74
+ data (e.g. EXIF metadata) will be propagated to COCO output
75
+ include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
73
76
  with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
74
- include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
75
- only class labels (no bounding boxes) will be included in the output. If this is False, empty
76
- images will be represented with no annotations.
77
+ include_annotations_without_bounding_boxes (bool, optional): the only time we end up with
78
+ annotations without bounding boxes is when a detection has the category [empty_category_id];
79
+ this determines whether those annotations are included in the output.
77
80
  empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
78
81
  attached to any bounding boxes
79
82
  overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
@@ -84,19 +87,26 @@ def md_to_coco(md_results_file,
84
87
  image sizes is the slowest step, so if you need to convert many results files at once for the same
85
88
  set of images, things will be gobs faster if you read the image sizes in advance and pass them in
86
89
  via this argument. The format used here is the same format output by parallel_get_image_sizes().
87
-
90
+ unrecognized_category_handling (str or float, optional): specifies what to do when encountering category
91
+ IDs not in the category mapping. Can be "error", "ignore", or "warning". Can also be a float,
92
+ in which case an error is thrown if an unrecognized category has a confidence value higher than
93
+ this value.
94
+
88
95
  Returns:
89
96
  dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
90
97
  is not None.
91
98
  """
92
-
99
+
93
100
  assert isinstance(md_results_file,str)
94
101
  assert os.path.isfile(md_results_file), \
95
102
  'MD results file {} does not exist'.format(md_results_file)
103
+ assert (isinstance(unrecognized_category_handling,float)) or \
104
+ (unrecognized_category_handling in ('error','warning','ignore')), \
105
+ 'Invalid category handling behavior {}'.format(unrecognized_category_handling)
96
106
 
97
107
  if coco_output_file == 'auto':
98
108
  coco_output_file = insert_before_extension(md_results_file,'coco')
99
-
109
+
100
110
  if coco_output_file is not None:
101
111
  if os.path.isfile(coco_output_file):
102
112
  if overwrite_behavior == 'skip':
@@ -120,155 +130,177 @@ def md_to_coco(md_results_file,
120
130
  pass
121
131
  elif overwrite_behavior == 'error':
122
132
  raise ValueError('Output file {} exists'.format(coco_output_file))
123
-
124
- with open(md_results_file,'r') as f:
125
- md_results = json.load(f)
126
-
133
+
134
+ with open(md_results_file,'r') as f:
135
+ md_results = json.load(f)
136
+
127
137
  coco_images = []
128
138
  coco_annotations = []
129
-
130
- print('Converting MD results file {} to COCO file {}...'.format(
131
- md_results_file, coco_output_file))
132
-
139
+
140
+ if verbose:
141
+ print('Converting MD results file {} to COCO file {}...'.format(
142
+ md_results_file, coco_output_file))
143
+
133
144
  # im = md_results['images'][0]
134
145
  for im in tqdm(md_results['images'],disable=(not verbose)):
135
-
146
+
136
147
  coco_im = {}
137
148
  coco_im['id'] = im['file']
138
149
  coco_im['file_name'] = im['file']
139
-
140
- # There is no concept of this in the COCO standard
150
+
151
+ # There is no concept of this in the COCO standard
141
152
  if 'failure' in im and im['failure'] is not None:
142
153
  if include_failed_images:
143
154
  coco_im['failure'] = im['failure']
144
155
  coco_images.append(coco_im)
145
156
  continue
146
-
157
+
147
158
  # Read/validate image size
148
159
  w = None
149
160
  h = None
150
-
161
+
151
162
  if ('width' not in im) or ('height' not in im) or validate_image_sizes:
152
163
  if (image_folder is None) and (image_filename_to_size is None):
153
- raise ValueError('Must provide an image folder or a size mapping when height/width need to be read from images')
154
-
164
+ raise ValueError('Must provide an image folder or a size mapping when ' + \
165
+ 'height/width need to be read from images')
166
+
155
167
  w = None; h = None
156
-
168
+
157
169
  if image_filename_to_size is not None:
158
-
170
+
159
171
  if im['file'] not in image_filename_to_size:
160
- print('Warning: file {} not in image size mapping dict, reading from file'.format(im['file']))
172
+ print('Warning: file {} not in image size mapping dict, reading from file'.format(
173
+ im['file']))
161
174
  else:
162
175
  image_size = image_filename_to_size[im['file']]
163
176
  if image_size is not None:
164
177
  assert len(image_size) == 2
165
178
  w = image_size[0]
166
179
  h = image_size[1]
167
-
180
+
168
181
  if w is None:
169
-
182
+
170
183
  image_file_abs = os.path.join(image_folder,im['file'])
171
184
  pil_im = vis_utils.open_image(image_file_abs)
172
185
  w = pil_im.width
173
186
  h = pil_im.height
174
-
187
+
175
188
  if validate_image_sizes:
176
189
  if 'width' in im:
177
190
  assert im['width'] == w, 'Width mismatch for image {}'.format(im['file'])
178
191
  if 'height' in im:
179
192
  assert im['height'] == h, 'Height mismatch for image {}'.format(im['file'])
180
193
  else:
181
-
194
+
182
195
  w = im['width']
183
196
  h = im['height']
184
-
197
+
185
198
  coco_im['width'] = w
186
199
  coco_im['height'] = h
187
-
200
+
188
201
  # Add other, non-standard fields to the output dict
189
202
  if preserve_nonstandard_metadata:
190
203
  for k in im.keys():
191
204
  if k not in ('file','detections','width','height'):
192
205
  coco_im[k] = im[k]
193
-
206
+
194
207
  coco_images.append(coco_im)
195
-
208
+
196
209
  # detection = im['detections'][0]
197
210
  for detection in im['detections']:
198
-
211
+
199
212
  # Skip below-threshold detections
200
213
  if confidence_threshold is not None and detection['conf'] < confidence_threshold:
201
214
  continue
202
-
215
+
203
216
  # Create an annotation
204
- ann = {}
217
+ ann = {}
205
218
  ann['id'] = str(uuid.uuid1())
206
- ann['image_id'] = coco_im['id']
207
-
219
+ ann['image_id'] = coco_im['id']
220
+
208
221
  md_category_id = detection['category']
222
+
223
+ if md_category_id not in md_results['detection_categories']:
224
+
225
+ s = 'unrecognized category ID {} occurred with confidence {} in file {}'.format(
226
+ md_category_id,detection['conf'],im['file'])
227
+ if isinstance(unrecognized_category_handling,float):
228
+ if detection['conf'] > unrecognized_category_handling:
229
+ raise ValueError(s)
230
+ else:
231
+ continue
232
+ elif unrecognized_category_handling == 'warning':
233
+ print('Warning: {}'.format(s))
234
+ continue
235
+ elif unrecognized_category_handling == 'ignore':
236
+ continue
237
+ else:
238
+ raise ValueError(s)
239
+
209
240
  coco_category_id = int(md_category_id)
210
241
  ann['category_id'] = coco_category_id
211
-
242
+
212
243
  if md_category_id != empty_category_id:
213
-
244
+
214
245
  assert 'bbox' in detection,\
215
246
  'Oops: non-empty category with no bbox in {}'.format(im['file'])
216
-
247
+
217
248
  ann['bbox'] = detection['bbox']
218
-
249
+
219
250
  # MegaDetector: [x,y,width,height] (normalized, origin upper-left)
220
251
  # COCO: [x,y,width,height] (absolute, origin upper-left)
221
252
  ann['bbox'][0] = ann['bbox'][0] * coco_im['width']
222
253
  ann['bbox'][1] = ann['bbox'][1] * coco_im['height']
223
254
  ann['bbox'][2] = ann['bbox'][2] * coco_im['width']
224
- ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
225
-
255
+ ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
256
+
226
257
  else:
227
-
258
+
228
259
  # In very esoteric cases, we use the empty category (0) in MD-formatted output files
229
- print('Warning: empty category ({}) used for annotation in file {}'.format(
260
+ print('Warning: empty category ({}) used for annotation for image {}'.format(
230
261
  empty_category_id,im['file']))
231
262
  pass
232
-
263
+
233
264
  if preserve_nonstandard_metadata:
234
265
  # "Score" is a semi-standard string here, recognized by at least pycocotools
235
266
  # ann['conf'] = detection['conf']
236
267
  ann['score'] = detection['conf']
237
-
268
+
238
269
  if 'bbox' in ann or include_annotations_without_bounding_boxes:
239
- coco_annotations.append(ann)
240
-
270
+ coco_annotations.append(ann)
271
+
241
272
  # ...for each detection
242
-
273
+
243
274
  # ...for each image
244
275
 
245
276
  output_dict = {}
246
-
277
+
247
278
  if info is not None:
248
279
  output_dict['info'] = info
249
280
  else:
250
281
  output_dict['info'] = {'description':'Converted from MD results file {}'.format(md_results_file)}
251
282
  output_dict['info']['confidence_threshold'] = confidence_threshold
252
-
283
+
253
284
  output_dict['images'] = coco_images
254
285
  output_dict['annotations'] = coco_annotations
255
-
286
+
256
287
  output_dict['categories'] = []
257
-
288
+
258
289
  for md_category_id in md_results['detection_categories'].keys():
259
-
290
+
260
291
  coco_category_id = int(md_category_id)
261
292
  coco_category = {'id':coco_category_id,
262
293
  'name':md_results['detection_categories'][md_category_id]}
263
294
  output_dict['categories'].append(coco_category)
264
-
265
- print('Writing COCO output file...')
266
-
295
+
296
+ if verbose:
297
+ print('Writing COCO output file...')
298
+
267
299
  if coco_output_file is not None:
268
300
  with open(coco_output_file,'w') as f:
269
301
  json.dump(output_dict,f,indent=1)
270
-
271
- return output_dict
302
+
303
+ return output_dict
272
304
 
273
305
  # ...def md_to_coco(...)
274
306
 
@@ -276,11 +308,11 @@ def md_to_coco(md_results_file,
276
308
  #%% Interactive driver
277
309
 
278
310
  if False:
279
-
311
+
280
312
  pass
281
313
 
282
314
  #%% Configure options
283
-
315
+
284
316
  md_results_file = os.path.expanduser('~/data/md-test.json')
285
317
  coco_output_file = os.path.expanduser('~/data/md-test-coco.json')
286
318
  image_folder = os.path.expanduser('~/data/md-test')
@@ -290,10 +322,10 @@ if False:
290
322
  info=None
291
323
  preserve_nonstandard_metadata=True
292
324
  include_failed_images=False
293
-
294
-
325
+
326
+
295
327
  #%% Programmatic execution
296
-
328
+
297
329
  output_dict = md_to_coco(md_results_file,
298
330
  coco_output_file=coco_output_file,
299
331
  image_folder=image_folder,
@@ -302,10 +334,10 @@ if False:
302
334
  info=info,
303
335
  preserve_nonstandard_metadata=preserve_nonstandard_metadata,
304
336
  include_failed_images=include_failed_images)
305
-
306
-
337
+
338
+
307
339
  #%% Command-line example
308
-
340
+
309
341
  s = f'python md_to_coco.py {md_results_file} {coco_output_file} {confidence_threshold} '
310
342
  if image_folder is not None:
311
343
  s += f' --image_folder {image_folder}'
@@ -316,9 +348,9 @@ if False:
316
348
 
317
349
  print(s); import clipboard; clipboard.copy(s)
318
350
 
319
-
351
+
320
352
  #%% Preview the resulting file
321
-
353
+
322
354
  from megadetector.visualization import visualize_db
323
355
  options = visualize_db.DbVizOptions()
324
356
  options.parallelize_rendering = True
@@ -327,49 +359,48 @@ if False:
327
359
 
328
360
  html_file,_ = visualize_db.visualize_db(coco_output_file,
329
361
  os.path.expanduser('~/tmp/md_to_coco_preview'),
330
- image_folder,options)
362
+ image_folder,options)
331
363
 
332
364
  from megadetector.utils import path_utils # noqa
333
365
  path_utils.open_file(html_file)
334
-
335
-
336
- #%% Command-line driver
337
366
 
338
- import sys,argparse
339
367
 
340
- def main():
368
+ #%% Command-line driver
369
+
370
+ def main(): # noqa
341
371
 
342
372
  parser = argparse.ArgumentParser(
343
- description='"Convert" MD output to COCO format, in quotes because this is an opinionated transformation that requires a confidence threshold')
344
-
373
+ description='"Convert" MD output to COCO format, in quotes because this is an opinionated ' + \
374
+ 'transformation that requires a confidence threshold')
375
+
345
376
  parser.add_argument(
346
377
  'md_results_file',
347
378
  type=str,
348
379
  help='Path to MD results file (.json)')
349
-
380
+
350
381
  parser.add_argument(
351
382
  'coco_output_file',
352
383
  type=str,
353
384
  help='Output filename (.json)')
354
-
385
+
355
386
  parser.add_argument(
356
387
  'confidence_threshold',
357
388
  type=float,
358
389
  default=default_confidence_threshold,
359
390
  help='Confidence threshold (default {})'.format(default_confidence_threshold)
360
391
  )
361
-
392
+
362
393
  parser.add_argument(
363
394
  '--image_folder',
364
395
  type=str,
365
396
  default=None,
366
397
  help='Image folder, only required if we will need to access image sizes'
367
398
  )
368
-
399
+
369
400
  parser.add_argument(
370
401
  '--preserve_nonstandard_metadata',
371
402
  action='store_true',
372
- help='Preserve metadata that isn\'t normally included in ' +
403
+ help='Preserve metadata that isn\'t normally included in ' +
373
404
  'COCO-formatted data (e.g. EXIF metadata, confidence values)'
374
405
  )
375
406
 
@@ -378,7 +409,7 @@ def main():
378
409
  action='store_true',
379
410
  help='Keep a record of corrupted images in the output; may not be completely COCO-compliant'
380
411
  )
381
-
412
+
382
413
  if len(sys.argv[1:]) == 0:
383
414
  parser.print_help()
384
415
  parser.exit()
@@ -392,7 +423,7 @@ def main():
392
423
  validate_image_sizes=False,
393
424
  info=None,
394
425
  preserve_nonstandard_metadata=args.preserve_nonstandard_metadata,
395
- include_failed_images=args.include_failed_images)
396
-
426
+ include_failed_images=args.include_failed_images)
427
+
397
428
  if __name__ == '__main__':
398
429
  main()