megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -5,7 +5,7 @@ identify_mislabeled_candidates.py
5
5
  Identify images that may have been mislabeled.
6
6
 
7
7
  A "mislabeled candidate" is defined as an image meeting both criteria:
8
-
8
+
9
9
  * according to the ground-truth label, the model made an incorrect prediction
10
10
 
11
11
  * the model's prediction confidence exceeds its confidence for the ground-truth
@@ -13,12 +13,12 @@ A "mislabeled candidate" is defined as an image meeting both criteria:
13
13
 
14
14
  This script outputs for each dataset a text file containing the filenames of
15
15
  mislabeled candidates, one per line. The text files are saved to:
16
-
16
+
17
17
  <logdir>/mislabeled_candidates_{split}_{dataset}.txt
18
18
 
19
19
  To this list of files can then be passed to AzCopy to be downloaded:
20
20
 
21
- ""
21
+ ""
22
22
  azcopy cp "http://<url_of_container>?<sas_token>" "/save/files/here" \
23
23
  --list-of-files "/path/to/mislabeled_candidates_{split}_{dataset}.txt"
24
24
  ""
@@ -68,7 +68,7 @@ from tqdm import tqdm
68
68
 
69
69
  def main(logdir: str, splits: Iterable[str], margin: float,
70
70
  include_dataset_in_filename: bool) -> None:
71
-
71
+
72
72
  # load files
73
73
  logdir = os.path.normpath(logdir) # removes any trailing slash
74
74
  base_logdir = os.path.dirname(logdir)
@@ -112,7 +112,7 @@ def get_candidates_df(outputs_csv_path: str, label_names: Sequence[str],
112
112
  Returns a DataFrame containing crops only from mislabeled candidate
113
113
  images.
114
114
  """
115
-
115
+
116
116
  df = pd.read_csv(outputs_csv_path, float_precision='high')
117
117
  all_rows = range(len(df))
118
118
  df['pred'] = df[label_names].idxmax(axis=1)
@@ -146,7 +146,7 @@ def _parse_args() -> argparse.Namespace:
146
146
 
147
147
 
148
148
  if __name__ == '__main__':
149
-
149
+
150
150
  args = _parse_args()
151
151
  main(logdir=args.logdir, splits=args.splits, margin=args.margin,
152
152
  include_dataset_in_filename=args.include_dataset_in_filename)
@@ -9,7 +9,7 @@ See: https://github.com/Azure/azure-storage-azcopy/wiki/Listing-specific-files-t
9
9
 
10
10
  """
11
11
 
12
- #%% Imports and constants
12
+ #%% Imports and constants
13
13
 
14
14
  import json
15
15
  import os
@@ -10,12 +10,12 @@ See README.md for an example of a classification label specification JSON file.
10
10
  The validation step takes the classification label specification JSON file and
11
11
  finds the dataset labels that belong to each classification label. It checks
12
12
  that the following conditions hold:
13
-
13
+
14
14
  1) Each classification label specification matches at least 1 dataset label.
15
15
 
16
16
  2) If the classification label includes a taxonomical specification, then the
17
17
  taxa is actually a part of our master taxonomy.
18
-
18
+
19
19
  3) If the 'prioritize' key is found for a given label, then the label must
20
20
  also have a 'max_count' key.
21
21
 
@@ -44,7 +44,7 @@ exist in Azure Blob Storage. In total, we output the following files:
44
44
 
45
45
  - queried_images.json
46
46
  main output file, ex:
47
-
47
+
48
48
  {
49
49
  "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
50
50
  "dataset": "caltech",
@@ -86,6 +86,7 @@ from typing import Any
86
86
 
87
87
  from megadetector.utils import path_utils
88
88
  from megadetector.utils import sas_blob_utils
89
+ from megadetector.utils import ct_utils
89
90
 
90
91
  from megadetector.data_management.megadb import megadb_utils
91
92
  from megadetector.taxonomy_mapping.taxonomy_graph import (
@@ -113,7 +114,7 @@ def main(label_spec_json_path: str,
113
114
  json_indent: int | None = None,
114
115
  seed: int = 123,
115
116
  mislabeled_images_dir: str | None = None) -> None:
116
-
117
+
117
118
  # input validation
118
119
  assert os.path.exists(label_spec_json_path)
119
120
  assert os.path.exists(taxonomy_csv_path)
@@ -166,36 +167,32 @@ def main(label_spec_json_path: str,
166
167
  date = datetime.now().strftime('%Y%m%d_%H%M%S') # ex: '20200722_110816'
167
168
  log_path = os.path.join(output_dir, f'json_validator_log_{date}.json')
168
169
  print(f'Saving log of bad images to {log_path}')
169
- with open(log_path, 'w') as f:
170
- json.dump(log, f, indent=1)
170
+ ct_utils.write_json(log_path, log)
171
171
 
172
172
  # save label counts, pre-subsampling
173
173
  print('Saving pre-sampling label counts')
174
174
  save_path = os.path.join(output_dir, 'image_counts_by_label_presample.json')
175
- with open(save_path, 'w') as f:
176
- image_counts_by_label = {
177
- label: len(filter_images(output_js, label))
178
- for label in sorted(input_js.keys())
179
- }
180
- json.dump(image_counts_by_label, f, indent=1)
175
+ image_counts_by_label_presample = {
176
+ label: len(filter_images(output_js, label))
177
+ for label in sorted(input_js.keys())
178
+ }
179
+ ct_utils.write_json(save_path, image_counts_by_label_presample)
181
180
 
182
181
  print('Sampling with priority (if needed)')
183
182
  output_js = sample_with_priority(input_js, output_js)
184
183
 
185
184
  print('Saving queried_images.json')
186
185
  output_json_path = os.path.join(output_dir, 'queried_images.json')
187
- with open(output_json_path, 'w') as f:
188
- json.dump(output_js, f, indent=json_indent)
186
+ ct_utils.write_json(output_json_path, output_js, indent=json_indent)
189
187
 
190
188
  # save label counts, post-subsampling
191
189
  print('Saving post-sampling label counts')
192
190
  save_path = os.path.join(output_dir, 'image_counts_by_label_sampled.json')
193
- with open(save_path, 'w') as f:
194
- image_counts_by_label = {
195
- label: len(filter_images(output_js, label))
196
- for label in sorted(input_js.keys())
197
- }
198
- json.dump(image_counts_by_label, f, indent=1)
191
+ image_counts_by_label_sampled = {
192
+ label: len(filter_images(output_js, label))
193
+ for label in sorted(input_js.keys())
194
+ }
195
+ ct_utils.write_json(save_path, image_counts_by_label_sampled)
199
196
 
200
197
 
201
198
  #%% Support functions
@@ -215,7 +212,7 @@ def parse_spec(spec_dict: Mapping[str, Any],
215
212
 
216
213
  Raises: ValueError, if specification does not match any dataset labels
217
214
  """
218
-
215
+
219
216
  results = set()
220
217
  if 'taxa' in spec_dict:
221
218
  # spec_dict['taxa']: list of dict
@@ -262,7 +259,7 @@ def validate_json(input_js: dict[str, dict[str, Any]],
262
259
  dataset labels, or if allow_multilabel=False but a dataset label is
263
260
  included in two or more classification labels
264
261
  """
265
-
262
+
266
263
  # maps output label name to set of (dataset, dataset_label) tuples
267
264
  label_to_inclusions: dict[str, set[tuple[str, str]]] = {}
268
265
  for label, spec_dict in input_js.items():
@@ -301,7 +298,7 @@ def get_output_json(label_to_inclusions: dict[str, set[tuple[str, str]]],
301
298
  - 'label': list of str, assigned output label
302
299
  - 'bbox': list of dicts, optional
303
300
  """
304
-
301
+
305
302
  # Because MegaDB is organized by dataset, we do the same...
306
303
  #
307
304
  # ds_to_labels = {
@@ -360,9 +357,9 @@ def get_output_json(label_to_inclusions: dict[str, set[tuple[str, str]]],
360
357
  '''
361
358
 
362
359
  output_json = {} # maps full image path to json object
363
-
360
+
364
361
  for ds in tqdm(sorted(ds_to_labels.keys())): # sort for determinism
365
-
362
+
366
363
  mislabeled_images: Mapping[str, Any] = {}
367
364
  if mislabeled_images_dir is not None:
368
365
  csv_path = os.path.join(mislabeled_images_dir, f'{ds}.csv')
@@ -428,7 +425,7 @@ def get_image_sas_uris(img_paths: Iterable[str]) -> list[str]:
428
425
  image_sas_uris: list of str, image blob URIs with SAS tokens, ready to
429
426
  pass to the batch detection API
430
427
  """
431
-
428
+
432
429
  # we need the datasets table for getting SAS keys
433
430
  datasets_table = megadb_utils.MegadbUtils().get_datasets_table()
434
431
 
@@ -483,7 +480,7 @@ def remove_nonexistent_images(js: MutableMapping[str, dict[str, Any]],
483
480
  check_local: optional str, path to local dir
484
481
  num_threads: int, number of threads to use for checking blob existence
485
482
  """
486
-
483
+
487
484
  def check_local_then_azure(local_path: str, blob_url: str) -> bool:
488
485
  return (os.path.exists(local_path)
489
486
  or sas_blob_utils.check_blob_exists(blob_url))
@@ -538,7 +535,7 @@ def remove_images_insufficient_locs(js: MutableMapping[str, dict[str, Any]],
538
535
  min_locs: optional int, minimum # of locations that each label must
539
536
  have in order to be included
540
537
  """
541
-
538
+
542
539
  # 1st pass: populate label_to_locs
543
540
  # label (tuple of str) => set of (dataset, location)
544
541
  label_to_locs = defaultdict(set)
@@ -574,7 +571,7 @@ def filter_images(output_js: Mapping[str, Mapping[str, Any]], label: str,
574
571
 
575
572
  Returns: set of str, image files that match the filtering criteria
576
573
  """
577
-
574
+
578
575
  img_files: set[str] = set()
579
576
  for img_file, img_dict in output_js.items():
580
577
  cond1 = (label in img_dict['label'])
@@ -594,7 +591,7 @@ def sample_with_priority(input_js: Mapping[str, Mapping[str, Any]],
594
591
 
595
592
  Returns: dict, keys are image file names, sorted alphabetically
596
593
  """
597
-
594
+
598
595
  filtered_imgs: set[str] = set()
599
596
  for label, spec_dict in input_js.items():
600
597
  if 'prioritize' in spec_dict and 'max_count' not in spec_dict:
@@ -635,7 +632,7 @@ def sample_with_priority(input_js: Mapping[str, Mapping[str, Any]],
635
632
  #%% Command-line driver
636
633
 
637
634
  def _parse_args() -> argparse.Namespace:
638
-
635
+
639
636
  parser = argparse.ArgumentParser(
640
637
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
641
638
  description='Validates JSON.')
@@ -685,7 +682,7 @@ def _parse_args() -> argparse.Namespace:
685
682
 
686
683
 
687
684
  if __name__ == '__main__':
688
-
685
+
689
686
  args = _parse_args()
690
687
  main(label_spec_json_path=args.label_spec_json,
691
688
  taxonomy_csv_path=args.taxonomy_csv,
@@ -12,11 +12,11 @@ Takes as input 2 label specification JSON files:
12
12
 
13
13
  1) desired label specification JSON file
14
14
  this should not have a target named "other"
15
-
15
+
16
16
  2) label specification JSON file of trained classifier
17
17
 
18
18
  The mapping is accomplished as follows:
19
-
19
+
20
20
  1. For each category in the classifier label spec, find all taxon nodes that
21
21
  belong to that category.
22
22
 
@@ -54,6 +54,7 @@ from tqdm import tqdm
54
54
 
55
55
  from megadetector.taxonomy_mapping.taxonomy_graph import (
56
56
  build_taxonomy_graph, dag_to_tree, TaxonNode)
57
+ from megadetector.utils import ct_utils
57
58
 
58
59
 
59
60
  #%% Example usage
@@ -105,8 +106,7 @@ def main(desired_label_spec_json_path: str,
105
106
  target_to_classifier_labels = map_target_to_classifier(
106
107
  target_label_to_nodes, classifier_label_to_nodes)
107
108
  os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
108
- with open(output_json_path, 'w') as f:
109
- json.dump(target_to_classifier_labels, f, indent=1)
109
+ ct_utils.write_json(output_json_path, target_to_classifier_labels)
110
110
 
111
111
 
112
112
  #%% Support functions
@@ -129,7 +129,7 @@ def map_target_to_classifier(
129
129
 
130
130
  Returns: dict, maps target label to set of classifier labels
131
131
  """
132
-
132
+
133
133
  remaining_classifier_labels = set(classifier_label_to_nodes.keys())
134
134
  target_to_classifier_labels: defaultdict[str, set[str]] = defaultdict(set)
135
135
  for target, target_nodes in tqdm(target_label_to_nodes.items()):
@@ -172,7 +172,7 @@ def parse_spec(spec_dict: Mapping[str, Any],
172
172
 
173
173
  Raises: ValueError, if specification does not match any dataset labels
174
174
  """
175
-
175
+
176
176
  result = set()
177
177
  if 'taxa' in spec_dict:
178
178
  for taxon in spec_dict['taxa']:
@@ -217,7 +217,7 @@ def label_spec_to_nodes(label_spec_js: dict[str, dict[str, Any]],
217
217
  Raises: ValueError, if a classification label specification matches no
218
218
  TaxonNode, or if a node is included in two or more classification labels
219
219
  """
220
-
220
+
221
221
  # maps output label name to set of (dataset, dataset_label) tuples
222
222
  seen_nodes: set[TaxonNode] = set()
223
223
  label_to_nodes: dict[str, set[TaxonNode]] = {}
@@ -239,7 +239,7 @@ def label_spec_to_nodes(label_spec_js: dict[str, dict[str, Any]],
239
239
  #%% Command-line driver
240
240
 
241
241
  def _parse_args() -> argparse.Namespace:
242
-
242
+
243
243
  parser = argparse.ArgumentParser(
244
244
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
245
245
  description='Create mapping from target categories to classifier '
@@ -267,7 +267,7 @@ def _parse_args() -> argparse.Namespace:
267
267
 
268
268
 
269
269
  if __name__ == '__main__':
270
-
270
+
271
271
  args = _parse_args()
272
272
  main(desired_label_spec_json_path=args.desired_label_spec_json,
273
273
  classifier_label_spec_json_path=args.classifier_label_spec_json,
@@ -71,6 +71,7 @@ import pandas as pd
71
71
  from tqdm import tqdm
72
72
 
73
73
  from megadetector.utils.ct_utils import round_float
74
+ from megadetector.utils import ct_utils
74
75
 
75
76
 
76
77
  #%% Example usage
@@ -109,7 +110,7 @@ def row_to_classification_list(row: Mapping[str, Any],
109
110
  (label_id + 1_000_000, 1.) to the list. If label_pos='first', we put this at
110
111
  the front of the list. Otherwise, we put it at the end.
111
112
  """
112
-
113
+
113
114
  contains_label = ('label' in row)
114
115
  assert contains_label or contains_preds
115
116
  if relative_conf:
@@ -176,7 +177,7 @@ def process_queried_images(
176
177
  Returns: dict, detections JSON file, except that the 'images' field is a
177
178
  dict (img_path => dict) instead of a list
178
179
  """
179
-
180
+
180
181
  # input validation
181
182
  assert os.path.exists(queried_images_json_path)
182
183
  detection_cache_dir = os.path.join(
@@ -274,7 +275,7 @@ def combine_classification_with_detection(
274
275
  label_pos: str | None = None,
275
276
  relative_conf: bool = False,
276
277
  typical_confidence_threshold: float = None
277
- ) -> dict[str, Any]:
278
+ ) -> dict[str, Any]:
278
279
  """
279
280
  Adds classification information to a detection JSON. Classification
280
281
  information may include the true label and/or the predicted confidences
@@ -302,7 +303,7 @@ def combine_classification_with_detection(
302
303
 
303
304
  Returns: dict, detections JSON file updated with classification results
304
305
  """
305
-
306
+
306
307
  classification_metadata = {
307
308
  'classifier': classifier_name,
308
309
  'classification_completion_time': classifier_timestamp
@@ -354,7 +355,7 @@ def main(classification_csv_path: str,
354
355
  label_pos: str | None,
355
356
  relative_conf: bool,
356
357
  typical_confidence_threshold: float) -> None:
357
-
358
+
358
359
  # input validation
359
360
  assert os.path.exists(classification_csv_path)
360
361
  assert os.path.exists(label_names_json_path)
@@ -409,8 +410,10 @@ def main(classification_csv_path: str,
409
410
  typical_confidence_threshold=typical_confidence_threshold)
410
411
 
411
412
  os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
412
- with open(output_json_path, 'w') as f:
413
- json.dump(classification_js, f, indent=1)
413
+ # The following line was removed as per the previous refactoring:
414
+ # with open(output_json_path, 'w') as f:
415
+ # json.dump(classification_js, f, indent=1)
416
+ ct_utils.write_json(output_json_path, classification_js)
414
417
 
415
418
  print('Wrote merged classification/detection results to {}'.format(output_json_path))
416
419
 
@@ -418,7 +421,7 @@ def main(classification_csv_path: str,
418
421
  #%% Command-line driver
419
422
 
420
423
  def _parse_args() -> argparse.Namespace:
421
-
424
+
422
425
  parser = argparse.ArgumentParser(
423
426
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
424
427
  description='Merges classification results with Batch Detection API '
@@ -487,7 +490,7 @@ def _parse_args() -> argparse.Namespace:
487
490
 
488
491
 
489
492
  if __name__ == '__main__':
490
-
493
+
491
494
  args = _parse_args()
492
495
  main(classification_csv_path=args.classification_csv,
493
496
  label_names_json_path=args.label_names_json,
@@ -13,7 +13,7 @@ Differs from prepare_classification_script_mc.py only in the final class mapping
13
13
 
14
14
  import os
15
15
 
16
- def main():
16
+ def main(): # noqa
17
17
  organization_name = 'idfg'
18
18
  job_name = 'idfg-2022-01-27-EOE2021S_Group6'
19
19
  input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
@@ -44,7 +44,7 @@ def main():
44
44
 
45
45
  for fn in input_files:
46
46
  assert os.path.isfile(fn)
47
-
47
+
48
48
 
49
49
  #%% Constants
50
50
 
@@ -76,7 +76,7 @@ def main():
76
76
  typical_classification_threshold_str = '0.75'
77
77
 
78
78
  classifier_name = 'idfg4'
79
-
79
+
80
80
 
81
81
  #%% Set up environment
82
82
 
@@ -88,18 +88,18 @@ def main():
88
88
  #%% Crop images
89
89
 
90
90
  if include_cropping:
91
-
91
+
92
92
  commands.append('\n### Cropping ###\n')
93
-
93
+
94
94
  # fn = input_files[0]
95
95
  for fn in input_files:
96
-
96
+
97
97
  input_file_path = fn
98
98
  crop_cmd = ''
99
-
99
+
100
100
  crop_comment = '\n# Cropping {}\n'.format(fn)
101
101
  crop_cmd += crop_comment
102
-
102
+
103
103
  crop_cmd += "python crop_detections.py \\\n" + \
104
104
  input_file_path + ' \\\n' + \
105
105
  crop_path + ' \\\n' + \
@@ -122,12 +122,12 @@ def main():
122
122
 
123
123
  input_file_path = fn
124
124
  classifier_output_path = crop_path + classifier_output_suffix
125
-
125
+
126
126
  classify_cmd = ''
127
-
127
+
128
128
  classify_comment = '\n# Classifying {}\n'.format(fn)
129
129
  classify_cmd += classify_comment
130
-
130
+
131
131
  classify_cmd += "python run_classifier.py \\\n" + \
132
132
  checkpoint_path + ' \\\n' + \
133
133
  crop_path + ' \\\n' + \
@@ -137,14 +137,14 @@ def main():
137
137
  '--image-size "' + image_size_str + '"' + ' \\\n' + \
138
138
  '--batch-size "' + batch_size_str + '"' + ' \\\n' + \
139
139
  '--num-workers "' + num_workers_str + '"' + ' \\\n'
140
-
140
+
141
141
  if device_id is not None:
142
142
  classify_cmd += '--device {}'.format(device_id)
143
-
144
- classify_cmd += '\n\n'
143
+
144
+ classify_cmd += '\n\n'
145
145
  classify_cmd = '{}'.format(classify_cmd)
146
146
  commands.append(classify_cmd)
147
-
147
+
148
148
 
149
149
  #%% Merge classification and detection outputs
150
150
 
@@ -161,12 +161,12 @@ def main():
161
161
  final_output_suffix)
162
162
  final_output_path = final_output_path.replace('_detections','')
163
163
  final_output_path = final_output_path.replace('_crops','')
164
-
164
+
165
165
  merge_cmd = ''
166
-
166
+
167
167
  merge_comment = '\n# Merging {}\n'.format(fn)
168
168
  merge_cmd += merge_comment
169
-
169
+
170
170
  merge_cmd += "python merge_classification_detection_output.py \\\n" + \
171
171
  classifier_output_path + ' \\\n' + \
172
172
  classifier_categories_path + ' \\\n' + \
@@ -189,6 +189,6 @@ def main():
189
189
  import stat
190
190
  st = os.stat(output_file)
191
191
  os.chmod(output_file, st.st_mode | stat.S_IEXEC)
192
-
192
+
193
193
  if __name__ == '__main__':
194
194
  main()
@@ -43,7 +43,7 @@ input_files = [
43
43
 
44
44
  for fn in input_files:
45
45
  assert os.path.isfile(fn)
46
-
46
+
47
47
 
48
48
  #%% Constants
49
49
 
@@ -76,7 +76,7 @@ classification_threshold_str = '0.05'
76
76
  typical_classification_threshold_str = '0.75'
77
77
 
78
78
  classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
79
-
79
+
80
80
 
81
81
  #%% Set up environment
82
82
 
@@ -94,10 +94,10 @@ for fn in input_files:
94
94
 
95
95
  input_file_path = fn
96
96
  crop_cmd = ''
97
-
97
+
98
98
  crop_comment = '\n# Cropping {}\n'.format(fn)
99
99
  crop_cmd += crop_comment
100
-
100
+
101
101
  crop_cmd += "python crop_detections.py \\\n" + \
102
102
  input_file_path + ' \\\n' + \
103
103
  crop_path + ' \\\n' + \
@@ -120,12 +120,12 @@ for fn in input_files:
120
120
 
121
121
  input_file_path = fn
122
122
  classifier_output_path = crop_path + classifier_output_suffix
123
-
123
+
124
124
  classify_cmd = ''
125
-
125
+
126
126
  classify_comment = '\n# Classifying {}\n'.format(fn)
127
127
  classify_cmd += classify_comment
128
-
128
+
129
129
  classify_cmd += "python run_classifier.py \\\n" + \
130
130
  checkpoint_path + ' \\\n' + \
131
131
  crop_path + ' \\\n' + \
@@ -135,14 +135,14 @@ for fn in input_files:
135
135
  '--image-size "' + image_size_str + '"' + ' \\\n' + \
136
136
  '--batch-size "' + batch_size_str + '"' + ' \\\n' + \
137
137
  '--num-workers "' + num_workers_str + '"' + ' \\\n'
138
-
138
+
139
139
  if device_id is not None:
140
140
  classify_cmd += '--device {}'.format(device_id)
141
-
142
- classify_cmd += '\n\n'
141
+
142
+ classify_cmd += '\n\n'
143
143
  classify_cmd = '{}'.format(classify_cmd)
144
144
  commands.append(classify_cmd)
145
-
145
+
146
146
 
147
147
  #%% Remap classifier outputs
148
148
 
@@ -156,25 +156,25 @@ for fn in input_files:
156
156
  classifier_output_path_remapped = \
157
157
  classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
158
158
  assert not (classifier_output_path == classifier_output_path_remapped)
159
-
159
+
160
160
  output_label_index = classifier_output_path_remapped.replace(
161
161
  "_remapped.csv.gz","_label_index_remapped.json")
162
-
162
+
163
163
  remap_cmd = ''
164
-
164
+
165
165
  remap_comment = '\n# Remapping {}\n'.format(fn)
166
166
  remap_cmd += remap_comment
167
-
167
+
168
168
  remap_cmd += "python aggregate_classifier_probs.py \\\n" + \
169
169
  classifier_output_path + ' \\\n' + \
170
170
  '--target-mapping "' + target_mapping_path + '"' + ' \\\n' + \
171
171
  '--output-csv "' + classifier_output_path_remapped + '"' + ' \\\n' + \
172
172
  '--output-label-index "' + output_label_index + '"' + ' \\\n' + \
173
173
  '\n'
174
-
174
+
175
175
  remap_cmd = '{}'.format(remap_cmd)
176
176
  commands.append(remap_cmd)
177
-
177
+
178
178
 
179
179
  #%% Merge classification and detection outputs
180
180
 
@@ -185,25 +185,25 @@ for fn in input_files:
185
185
 
186
186
  input_file_path = fn
187
187
  classifier_output_path = crop_path + classifier_output_suffix
188
-
188
+
189
189
  classifier_output_path_remapped = \
190
190
  classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
191
-
191
+
192
192
  output_label_index = classifier_output_path_remapped.replace(
193
193
  "_remapped.csv.gz","_label_index_remapped.json")
194
-
194
+
195
195
  final_output_path = os.path.join(output_base,
196
196
  os.path.basename(classifier_output_path)).\
197
197
  replace(classifier_output_suffix,
198
198
  final_output_suffix)
199
199
  final_output_path = final_output_path.replace('_detections','')
200
200
  final_output_path = final_output_path.replace('_crops','')
201
-
201
+
202
202
  merge_cmd = ''
203
-
203
+
204
204
  merge_comment = '\n# Merging {}\n'.format(fn)
205
205
  merge_cmd += merge_comment
206
-
206
+
207
207
  merge_cmd += "python merge_classification_detection_output.py \\\n" + \
208
208
  classifier_output_path_remapped + ' \\\n' + \
209
209
  output_label_index + ' \\\n' + \
@@ -103,7 +103,7 @@ def create_loader(cropped_images_dir: str,
103
103
  batch_size: int, batch size in dataloader
104
104
  num_workers: int, # of workers in dataloader
105
105
  """
106
-
106
+
107
107
  crop_files = []
108
108
 
109
109
  if detections_json_path is None:
@@ -160,7 +160,7 @@ def main(model_path: str,
160
160
  batch_size: int,
161
161
  num_workers: int,
162
162
  device_id: int | None = None) -> None:
163
-
163
+
164
164
  # Evaluating with accimage is much faster than Pillow or Pillow-SIMD, but accimage
165
165
  # is Linux-only.
166
166
  try:
@@ -207,7 +207,7 @@ def test_epoch(model: torch.nn.Module,
207
207
  label_names: optional list of str, label names
208
208
  output_csv_path: str
209
209
  """
210
-
210
+
211
211
  # set dropout and BN layers to eval mode
212
212
  model.eval()
213
213
 
@@ -274,7 +274,7 @@ def _parse_args() -> argparse.Namespace:
274
274
 
275
275
 
276
276
  if __name__ == '__main__':
277
-
277
+
278
278
  args = _parse_args()
279
279
  main(model_path=args.model,
280
280
  cropped_images_dir=args.crops_dir,