megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -18,7 +18,7 @@ Includes functions to read/write the (very very old) .csv results format.
18
18
  import json
19
19
  import os
20
20
 
21
- from typing import Dict, Mapping, Optional, Tuple
21
+ from typing import Mapping, Optional
22
22
 
23
23
  import pandas as pd
24
24
 
@@ -31,7 +31,7 @@ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
31
31
  def load_api_results(api_output_path: str, normalize_paths: bool = True,
32
32
  filename_replacements: Optional[Mapping[str, str]] = None,
33
33
  force_forward_slashes: bool = True
34
- ) -> Tuple[pd.DataFrame, Dict]:
34
+ ) -> tuple[pd.DataFrame, dict]:
35
35
  r"""
36
36
  Loads json-formatted MegaDetector results to a Pandas DataFrame.
37
37
 
@@ -48,11 +48,11 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
48
48
  detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
49
49
  other_fields: a dict containing fields in the results other than 'images'
50
50
  """
51
-
51
+
52
52
  print('Loading results from {}'.format(api_output_path))
53
53
 
54
54
  detection_results = load_md_or_speciesnet_file(api_output_path)
55
-
55
+
56
56
  # Validate that this is really a detector output file
57
57
  for s in ['info', 'detection_categories', 'images']:
58
58
  assert s in detection_results, 'Missing field {} in detection results'.format(s)
@@ -65,12 +65,12 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
65
65
 
66
66
  if normalize_paths:
67
67
  for image in detection_results['images']:
68
- image['file'] = os.path.normpath(image['file'])
68
+ image['file'] = os.path.normpath(image['file'])
69
69
 
70
70
  if force_forward_slashes:
71
71
  for image in detection_results['images']:
72
72
  image['file'] = image['file'].replace('\\','/')
73
-
73
+
74
74
  # Replace some path tokens to match local paths to original blob structure
75
75
  if filename_replacements is not None:
76
76
  for string_to_replace in filename_replacements.keys():
@@ -79,16 +79,16 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
79
79
  im['file'] = im['file'].replace(string_to_replace,replacement_string)
80
80
 
81
81
  print('Converting results to dataframe')
82
-
82
+
83
83
  # If this is a newer file that doesn't include maximum detection confidence values,
84
84
  # add them, because our unofficial internal dataframe format includes this.
85
85
  for im in detection_results['images']:
86
86
  if 'max_detection_conf' not in im:
87
87
  im['max_detection_conf'] = ct_utils.get_max_conf(im)
88
-
88
+
89
89
  # Pack the json output into a Pandas DataFrame
90
90
  detection_results = pd.DataFrame(detection_results['images'])
91
-
91
+
92
92
  print('Finished loading MegaDetector results for {} images from {}'.format(
93
93
  len(detection_results),api_output_path))
94
94
 
@@ -111,7 +111,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
111
111
  if 'failure' in im and im['failure'] is None:
112
112
  del im['failure']
113
113
  fields['images'] = images
114
-
114
+
115
115
  # Convert the 'version' field back to a string as per format convention
116
116
  try:
117
117
  version = other_fields['info']['format_version']
@@ -120,7 +120,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
120
120
  except Exception:
121
121
  print('Warning: error determining format version')
122
122
  pass
123
-
123
+
124
124
  # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
125
125
  try:
126
126
  version = other_fields['info']['format_version']
@@ -132,20 +132,23 @@ def write_api_results(detection_results_table, other_fields, out_path):
132
132
  except Exception:
133
133
  print('Warning: error removing max_detection_conf from output')
134
134
  pass
135
-
135
+
136
136
  with open(out_path, 'w') as f:
137
137
  json.dump(fields, f, indent=1)
138
138
 
139
139
  print('Finished writing detection results to {}'.format(out_path))
140
140
 
141
141
 
142
- def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
142
+ def load_api_results_csv(filename, normalize_paths=True, filename_replacements=None, nrows=None):
143
143
  """
144
144
  [DEPRECATED]
145
-
145
+
146
146
  Loads .csv-formatted MegaDetector results to a pandas table
147
147
  """
148
148
 
149
+ if filename_replacements is None:
150
+ filename_replacements = {}
151
+
149
152
  print('Loading MegaDetector results from {}'.format(filename))
150
153
 
151
154
  detection_results = pd.read_csv(filename,nrows=nrows)
@@ -169,12 +172,12 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
169
172
 
170
173
  replacement_string = filename_replacements[string_to_replace]
171
174
 
172
- # iRow = 0
173
- for iRow in range(0,len(detection_results)):
174
- row = detection_results.iloc[iRow]
175
+ # i_row = 0
176
+ for i_row in range(0,len(detection_results)):
177
+ row = detection_results.iloc[i_row]
175
178
  fn = row['image_path']
176
179
  fn = fn.replace(string_to_replace,replacement_string)
177
- detection_results.at[iRow,'image_path'] = fn
180
+ detection_results.at[i_row,'image_path'] = fn
178
181
 
179
182
  print('Finished loading and de-serializing MD results for {} images from {}'.format(
180
183
  len(detection_results),filename))
@@ -183,9 +186,9 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
183
186
 
184
187
 
185
188
  def write_api_results_csv(detection_results, filename):
186
- """
189
+ """
187
190
  [DEPRECATED]
188
-
191
+
189
192
  Writes a Pandas table to csv in a way that's compatible with the .csv output
190
193
  format. Currently just a wrapper around to_csv that forces output writing
191
194
  to go through a common code path.
@@ -3,7 +3,7 @@
3
3
  md_to_coco.py
4
4
 
5
5
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
- this is an opinionated transformation that requires a confidence threshold for most
6
+ this is an opinionated transformation that requires a confidence threshold for most
7
7
  applications.
8
8
 
9
9
  Does not currently handle classification information.
@@ -15,6 +15,8 @@ Does not currently handle classification information.
15
15
  import os
16
16
  import json
17
17
  import uuid
18
+ import sys
19
+ import argparse
18
20
 
19
21
  from tqdm import tqdm
20
22
 
@@ -36,40 +38,41 @@ def md_to_coco(md_results_file,
36
38
  include_failed_images=True,
37
39
  include_annotations_without_bounding_boxes=True,
38
40
  empty_category_id='0',
39
- overwrite_behavior='skip',
41
+ overwrite_behavior='skip',
40
42
  verbose=True,
41
- image_filename_to_size=None):
43
+ image_filename_to_size=None,
44
+ unrecognized_category_handling='error'):
42
45
  """
43
46
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
44
47
  this is an opinionated transformation that typically requires a confidence threshold.
45
-
46
- The default confidence threshold is not 0; the assumption is that by default, you are
48
+
49
+ The default confidence threshold is not 0; the assumption is that by default, you are
47
50
  going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
48
- file to *evaluate* a detector, rather than as a set of labels, you likely want a
49
- confidence threshold of 0. Confidence values will be written to the semi-standard "score"
51
+ file to *evaluate* a detector, rather than as a set of labels, you likely want a
52
+ confidence threshold of 0. Confidence values will be written to the semi-standard "score"
50
53
  field for each image (regardless of the threshold) if preserve_nonstandard_metadata is True.
51
-
52
- A folder of images is required if width and height information are not available
54
+
55
+ A folder of images is required if width and height information are not available
53
56
  in the MD results file.
54
57
 
55
58
  Args:
56
- md_results_file (str): MD results .json file to convert to COCO
59
+ md_results_file (str): MD results .json file to convert to COCO
57
60
  format
58
- coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
61
+ coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
59
62
  a COCO-formatted dict, but won't write it to disk. If this is 'auto', we'll write to
60
63
  [md_results_file_without_extension].coco.json.
61
64
  image_folder (str, optional): folder of images, required if 'width' and 'height' are not
62
65
  present in the MD results file (they are not required by the format)
63
66
  confidence_threshold (float, optional): boxes below this confidence threshold will not be
64
67
  included in the output data
65
- validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
68
+ validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
66
69
  regardless of whether "width" and "height" are present in the MD results file.
67
70
  info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
68
71
  output
69
- preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
70
- non-standard "score" field in each annotation, and any random fields present in each image's
71
- data (e.g. EXIF metadata) will be propagated to COCO output
72
- include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
72
+ preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
73
+ non-standard "score" field in each annotation, and any random fields present in each image's
74
+ data (e.g. EXIF metadata) will be propagated to COCO output
75
+ include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
73
76
  with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
74
77
  include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
75
78
  only class labels (no bounding boxes) will be included in the output. If this is False, empty
@@ -84,19 +87,26 @@ def md_to_coco(md_results_file,
84
87
  image sizes is the slowest step, so if you need to convert many results files at once for the same
85
88
  set of images, things will be gobs faster if you read the image sizes in advance and pass them in
86
89
  via this argument. The format used here is the same format output by parallel_get_image_sizes().
87
-
90
+ unrecognized_category_handling (str or float, optional): specifies what to do when encountering category
91
+ IDs not in the category mapping. Can be "error", "ignore", or "warning". Can also be a float,
92
+ in which case an error is thrown if an unrecognized category has a confidence value higher than
93
+ this value.
94
+
88
95
  Returns:
89
96
  dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
90
97
  is not None.
91
98
  """
92
-
99
+
93
100
  assert isinstance(md_results_file,str)
94
101
  assert os.path.isfile(md_results_file), \
95
102
  'MD results file {} does not exist'.format(md_results_file)
103
+ assert (isinstance(unrecognized_category_handling,float)) or \
104
+ (unrecognized_category_handling in ('error','warning','ignore')), \
105
+ 'Invalid category handling behavior {}'.format(unrecognized_category_handling)
96
106
 
97
107
  if coco_output_file == 'auto':
98
108
  coco_output_file = insert_before_extension(md_results_file,'coco')
99
-
109
+
100
110
  if coco_output_file is not None:
101
111
  if os.path.isfile(coco_output_file):
102
112
  if overwrite_behavior == 'skip':
@@ -120,155 +130,177 @@ def md_to_coco(md_results_file,
120
130
  pass
121
131
  elif overwrite_behavior == 'error':
122
132
  raise ValueError('Output file {} exists'.format(coco_output_file))
123
-
124
- with open(md_results_file,'r') as f:
125
- md_results = json.load(f)
126
-
133
+
134
+ with open(md_results_file,'r') as f:
135
+ md_results = json.load(f)
136
+
127
137
  coco_images = []
128
138
  coco_annotations = []
129
-
130
- print('Converting MD results file {} to COCO file {}...'.format(
131
- md_results_file, coco_output_file))
132
-
139
+
140
+ if verbose:
141
+ print('Converting MD results file {} to COCO file {}...'.format(
142
+ md_results_file, coco_output_file))
143
+
133
144
  # im = md_results['images'][0]
134
145
  for im in tqdm(md_results['images'],disable=(not verbose)):
135
-
146
+
136
147
  coco_im = {}
137
148
  coco_im['id'] = im['file']
138
149
  coco_im['file_name'] = im['file']
139
-
140
- # There is no concept of this in the COCO standard
150
+
151
+ # There is no concept of this in the COCO standard
141
152
  if 'failure' in im and im['failure'] is not None:
142
153
  if include_failed_images:
143
154
  coco_im['failure'] = im['failure']
144
155
  coco_images.append(coco_im)
145
156
  continue
146
-
157
+
147
158
  # Read/validate image size
148
159
  w = None
149
160
  h = None
150
-
161
+
151
162
  if ('width' not in im) or ('height' not in im) or validate_image_sizes:
152
163
  if (image_folder is None) and (image_filename_to_size is None):
153
- raise ValueError('Must provide an image folder or a size mapping when height/width need to be read from images')
154
-
164
+ raise ValueError('Must provide an image folder or a size mapping when ' + \
165
+ 'height/width need to be read from images')
166
+
155
167
  w = None; h = None
156
-
168
+
157
169
  if image_filename_to_size is not None:
158
-
170
+
159
171
  if im['file'] not in image_filename_to_size:
160
- print('Warning: file {} not in image size mapping dict, reading from file'.format(im['file']))
172
+ print('Warning: file {} not in image size mapping dict, reading from file'.format(
173
+ im['file']))
161
174
  else:
162
175
  image_size = image_filename_to_size[im['file']]
163
176
  if image_size is not None:
164
177
  assert len(image_size) == 2
165
178
  w = image_size[0]
166
179
  h = image_size[1]
167
-
180
+
168
181
  if w is None:
169
-
182
+
170
183
  image_file_abs = os.path.join(image_folder,im['file'])
171
184
  pil_im = vis_utils.open_image(image_file_abs)
172
185
  w = pil_im.width
173
186
  h = pil_im.height
174
-
187
+
175
188
  if validate_image_sizes:
176
189
  if 'width' in im:
177
190
  assert im['width'] == w, 'Width mismatch for image {}'.format(im['file'])
178
191
  if 'height' in im:
179
192
  assert im['height'] == h, 'Height mismatch for image {}'.format(im['file'])
180
193
  else:
181
-
194
+
182
195
  w = im['width']
183
196
  h = im['height']
184
-
197
+
185
198
  coco_im['width'] = w
186
199
  coco_im['height'] = h
187
-
200
+
188
201
  # Add other, non-standard fields to the output dict
189
202
  if preserve_nonstandard_metadata:
190
203
  for k in im.keys():
191
204
  if k not in ('file','detections','width','height'):
192
205
  coco_im[k] = im[k]
193
-
206
+
194
207
  coco_images.append(coco_im)
195
-
208
+
196
209
  # detection = im['detections'][0]
197
210
  for detection in im['detections']:
198
-
211
+
199
212
  # Skip below-threshold detections
200
213
  if confidence_threshold is not None and detection['conf'] < confidence_threshold:
201
214
  continue
202
-
215
+
203
216
  # Create an annotation
204
- ann = {}
217
+ ann = {}
205
218
  ann['id'] = str(uuid.uuid1())
206
- ann['image_id'] = coco_im['id']
207
-
219
+ ann['image_id'] = coco_im['id']
220
+
208
221
  md_category_id = detection['category']
222
+
223
+ if md_category_id not in md_results['detection_categories']:
224
+
225
+ s = 'unrecognized category ID {} occurred with confidence {} in file {}'.format(
226
+ md_category_id,detection['conf'],im['file'])
227
+ if isinstance(unrecognized_category_handling,float):
228
+ if detection['conf'] > unrecognized_category_handling:
229
+ raise ValueError(s)
230
+ else:
231
+ continue
232
+ elif unrecognized_category_handling == 'warning':
233
+ print('Warning: {}'.format(s))
234
+ continue
235
+ elif unrecognized_category_handling == 'ignore':
236
+ continue
237
+ else:
238
+ raise ValueError(s)
239
+
209
240
  coco_category_id = int(md_category_id)
210
241
  ann['category_id'] = coco_category_id
211
-
242
+
212
243
  if md_category_id != empty_category_id:
213
-
244
+
214
245
  assert 'bbox' in detection,\
215
246
  'Oops: non-empty category with no bbox in {}'.format(im['file'])
216
-
247
+
217
248
  ann['bbox'] = detection['bbox']
218
-
249
+
219
250
  # MegaDetector: [x,y,width,height] (normalized, origin upper-left)
220
251
  # COCO: [x,y,width,height] (absolute, origin upper-left)
221
252
  ann['bbox'][0] = ann['bbox'][0] * coco_im['width']
222
253
  ann['bbox'][1] = ann['bbox'][1] * coco_im['height']
223
254
  ann['bbox'][2] = ann['bbox'][2] * coco_im['width']
224
- ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
225
-
255
+ ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
256
+
226
257
  else:
227
-
258
+
228
259
  # In very esoteric cases, we use the empty category (0) in MD-formatted output files
229
260
  print('Warning: empty category ({}) used for annotation in file {}'.format(
230
261
  empty_category_id,im['file']))
231
262
  pass
232
-
263
+
233
264
  if preserve_nonstandard_metadata:
234
265
  # "Score" is a semi-standard string here, recognized by at least pycocotools
235
266
  # ann['conf'] = detection['conf']
236
267
  ann['score'] = detection['conf']
237
-
268
+
238
269
  if 'bbox' in ann or include_annotations_without_bounding_boxes:
239
- coco_annotations.append(ann)
240
-
270
+ coco_annotations.append(ann)
271
+
241
272
  # ...for each detection
242
-
273
+
243
274
  # ...for each image
244
275
 
245
276
  output_dict = {}
246
-
277
+
247
278
  if info is not None:
248
279
  output_dict['info'] = info
249
280
  else:
250
281
  output_dict['info'] = {'description':'Converted from MD results file {}'.format(md_results_file)}
251
282
  output_dict['info']['confidence_threshold'] = confidence_threshold
252
-
283
+
253
284
  output_dict['images'] = coco_images
254
285
  output_dict['annotations'] = coco_annotations
255
-
286
+
256
287
  output_dict['categories'] = []
257
-
288
+
258
289
  for md_category_id in md_results['detection_categories'].keys():
259
-
290
+
260
291
  coco_category_id = int(md_category_id)
261
292
  coco_category = {'id':coco_category_id,
262
293
  'name':md_results['detection_categories'][md_category_id]}
263
294
  output_dict['categories'].append(coco_category)
264
-
265
- print('Writing COCO output file...')
266
-
295
+
296
+ if verbose:
297
+ print('Writing COCO output file...')
298
+
267
299
  if coco_output_file is not None:
268
300
  with open(coco_output_file,'w') as f:
269
301
  json.dump(output_dict,f,indent=1)
270
-
271
- return output_dict
302
+
303
+ return output_dict
272
304
 
273
305
  # ...def md_to_coco(...)
274
306
 
@@ -276,11 +308,11 @@ def md_to_coco(md_results_file,
276
308
  #%% Interactive driver
277
309
 
278
310
  if False:
279
-
311
+
280
312
  pass
281
313
 
282
314
  #%% Configure options
283
-
315
+
284
316
  md_results_file = os.path.expanduser('~/data/md-test.json')
285
317
  coco_output_file = os.path.expanduser('~/data/md-test-coco.json')
286
318
  image_folder = os.path.expanduser('~/data/md-test')
@@ -290,10 +322,10 @@ if False:
290
322
  info=None
291
323
  preserve_nonstandard_metadata=True
292
324
  include_failed_images=False
293
-
294
-
325
+
326
+
295
327
  #%% Programmatic execution
296
-
328
+
297
329
  output_dict = md_to_coco(md_results_file,
298
330
  coco_output_file=coco_output_file,
299
331
  image_folder=image_folder,
@@ -302,10 +334,10 @@ if False:
302
334
  info=info,
303
335
  preserve_nonstandard_metadata=preserve_nonstandard_metadata,
304
336
  include_failed_images=include_failed_images)
305
-
306
-
337
+
338
+
307
339
  #%% Command-line example
308
-
340
+
309
341
  s = f'python md_to_coco.py {md_results_file} {coco_output_file} {confidence_threshold} '
310
342
  if image_folder is not None:
311
343
  s += f' --image_folder {image_folder}'
@@ -316,9 +348,9 @@ if False:
316
348
 
317
349
  print(s); import clipboard; clipboard.copy(s)
318
350
 
319
-
351
+
320
352
  #%% Preview the resulting file
321
-
353
+
322
354
  from megadetector.visualization import visualize_db
323
355
  options = visualize_db.DbVizOptions()
324
356
  options.parallelize_rendering = True
@@ -327,49 +359,48 @@ if False:
327
359
 
328
360
  html_file,_ = visualize_db.visualize_db(coco_output_file,
329
361
  os.path.expanduser('~/tmp/md_to_coco_preview'),
330
- image_folder,options)
362
+ image_folder,options)
331
363
 
332
364
  from megadetector.utils import path_utils # noqa
333
365
  path_utils.open_file(html_file)
334
-
335
-
336
- #%% Command-line driver
337
366
 
338
- import sys,argparse
339
367
 
340
- def main():
368
+ #%% Command-line driver
369
+
370
+ def main(): # noqa
341
371
 
342
372
  parser = argparse.ArgumentParser(
343
- description='"Convert" MD output to COCO format, in quotes because this is an opinionated transformation that requires a confidence threshold')
344
-
373
+ description='"Convert" MD output to COCO format, in quotes because this is an opinionated ' + \
374
+ 'transformation that requires a confidence threshold')
375
+
345
376
  parser.add_argument(
346
377
  'md_results_file',
347
378
  type=str,
348
379
  help='Path to MD results file (.json)')
349
-
380
+
350
381
  parser.add_argument(
351
382
  'coco_output_file',
352
383
  type=str,
353
384
  help='Output filename (.json)')
354
-
385
+
355
386
  parser.add_argument(
356
387
  'confidence_threshold',
357
388
  type=float,
358
389
  default=default_confidence_threshold,
359
390
  help='Confidence threshold (default {})'.format(default_confidence_threshold)
360
391
  )
361
-
392
+
362
393
  parser.add_argument(
363
394
  '--image_folder',
364
395
  type=str,
365
396
  default=None,
366
397
  help='Image folder, only required if we will need to access image sizes'
367
398
  )
368
-
399
+
369
400
  parser.add_argument(
370
401
  '--preserve_nonstandard_metadata',
371
402
  action='store_true',
372
- help='Preserve metadata that isn\'t normally included in ' +
403
+ help='Preserve metadata that isn\'t normally included in ' +
373
404
  'COCO-formatted data (e.g. EXIF metadata, confidence values)'
374
405
  )
375
406
 
@@ -378,7 +409,7 @@ def main():
378
409
  action='store_true',
379
410
  help='Keep a record of corrupted images in the output; may not be completely COCO-compliant'
380
411
  )
381
-
412
+
382
413
  if len(sys.argv[1:]) == 0:
383
414
  parser.print_help()
384
415
  parser.exit()
@@ -392,7 +423,7 @@ def main():
392
423
  validate_image_sizes=False,
393
424
  info=None,
394
425
  preserve_nonstandard_metadata=args.preserve_nonstandard_metadata,
395
- include_failed_images=args.include_failed_images)
396
-
426
+ include_failed_images=args.include_failed_images)
427
+
397
428
  if __name__ == '__main__':
398
429
  main()