megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -114,7 +114,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
114
114
  if (not force_init) and (inat_taxonomy is not None):
115
115
  print('Skipping taxonomy re-init')
116
116
  return
117
-
117
+
118
118
  if (not force_init) and (os.path.isfile(serialized_structures_file)):
119
119
 
120
120
  print(f'De-serializing taxonomy data from {serialized_structures_file}')
@@ -135,7 +135,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
135
135
  gbif_vernacular_to_taxon_id,\
136
136
  gbif_taxon_id_to_scientific,\
137
137
  gbif_scientific_to_taxon_id = structures_to_serialize
138
-
138
+
139
139
  return
140
140
 
141
141
 
@@ -146,7 +146,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
146
146
  for taxonomy_name, zip_url in taxonomy_urls.items():
147
147
 
148
148
  need_to_download = False
149
-
149
+
150
150
  if force_init:
151
151
  need_to_download = True
152
152
 
@@ -267,7 +267,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
267
267
  # Build iNat dictionaries
268
268
 
269
269
  print('Building lookup dictionaries for iNat taxonomy')
270
-
270
+
271
271
  for i_row, row in tqdm(inat_taxonomy.iterrows(), total=len(inat_taxonomy)):
272
272
 
273
273
  taxon_id = row['taxonID']
@@ -286,7 +286,7 @@ def initialize_taxonomy_lookup(force_init=False) -> None:
286
286
  # Build GBIF dictionaries
287
287
 
288
288
  print('Building lookup dictionaries for GBIF taxonomy')
289
-
289
+
290
290
  for i_row, row in tqdm(gbif_taxonomy.iterrows(), total=len(gbif_taxonomy)):
291
291
 
292
292
  taxon_id = row['taxonID']
@@ -596,21 +596,21 @@ class TaxonomicMatch:
596
596
 
597
597
 
598
598
  hyphenated_terms = ['crowned', 'backed', 'throated', 'tailed', 'headed', 'cheeked',
599
- 'ruffed', 'browed', 'eating', 'striped', 'shanked',
599
+ 'ruffed', 'browed', 'eating', 'striped', 'shanked',
600
600
  'fronted', 'bellied', 'spotted', 'eared', 'collared', 'breasted',
601
601
  'necked']
602
602
 
603
603
  def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retry=True) -> TaxonomicMatch:
604
604
  """
605
- Wrapper for _get_preferred_taxonomic_match, but expressing a variety of heuristics
605
+ Wrapper for _get_preferred_taxonomic_match, but expressing a variety of heuristics
606
606
  and preferences that are specific to our scenario.
607
-
607
+
608
608
  Args:
609
609
  query (str): The common or scientific name we want to look up
610
610
  taxonomy_preference (str, optional): 'inat' or 'gbif'
611
- retry (bool, optional): if the initial lookup fails, should we try heuristic
611
+ retry (bool, optional): if the initial lookup fails, should we try heuristic
612
612
  substitutions, e.g. replacing "_" with " ", or "spp" with "species"?
613
-
613
+
614
614
  Returns:
615
615
  TaxonomicMatch: the best taxonomic match, or None
616
616
  """
@@ -618,31 +618,31 @@ def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retr
618
618
  m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
619
619
  if (len(m.scientific_name) > 0) or (not retry):
620
620
  return m
621
-
621
+
622
622
  for s in hyphenated_terms:
623
623
  query = query.replace(' ' + s,'-' + s)
624
624
  m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
625
625
  return m
626
-
627
-
626
+
627
+
628
628
  def validate_and_convert(data):
629
629
  """
630
630
  Recursively validates that all elements in the nested structure are only
631
631
  tuples, lists, ints, or np.int64, and converts np.int64 to int.
632
-
632
+
633
633
  Args:
634
634
  data: The nested structure to validate and convert
635
-
635
+
636
636
  Returns:
637
637
  The validated and converted structure
638
-
638
+
639
639
  Raises:
640
640
  TypeError: If an invalid type is encountered
641
641
  """
642
-
643
- if isinstance(data, np.int64):
642
+
643
+ if isinstance(data, np.int64):
644
644
  return int(data)
645
- elif isinstance(data, int) or isinstance(data, str):
645
+ elif isinstance(data, int) or isinstance(data, str):
646
646
  return data
647
647
  elif isinstance(data, (list, tuple)):
648
648
  # Process lists and tuples recursively
@@ -654,17 +654,17 @@ def validate_and_convert(data):
654
654
 
655
655
  # ...def validate_and_convert(...)
656
656
 
657
-
657
+
658
658
  def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') -> TaxonomicMatch:
659
-
659
+
660
660
  query = query.lower().strip().replace('_', ' ')
661
661
  query = query.replace('unidentified','')
662
662
  query = query.replace('unknown','')
663
663
  if query.endswith(' sp'):
664
664
  query = query.replace(' sp','')
665
665
  if query.endswith(' group'):
666
- query = query.replace(' group','')
667
-
666
+ query = query.replace(' group','')
667
+
668
668
  query = query.strip()
669
669
 
670
670
  # query = 'person'
@@ -686,17 +686,17 @@ def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') ->
686
686
 
687
687
  n_inat_matches = len(inat_matches)
688
688
  n_gbif_matches = len(gbif_matches)
689
-
689
+
690
690
  selected_matches = None
691
-
691
+
692
692
  assert taxonomy_preference in ['gbif','inat'],\
693
693
  'Unrecognized taxonomy preference: {}'.format(taxonomy_preference)
694
-
694
+
695
695
  if n_inat_matches > 0 and taxonomy_preference == 'inat':
696
696
  selected_matches = 'inat'
697
697
  elif n_gbif_matches > 0:
698
698
  selected_matches = 'gbif'
699
-
699
+
700
700
  if selected_matches == 'inat':
701
701
 
702
702
  i_match = 0
@@ -802,7 +802,7 @@ def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') ->
802
802
  # Convert np.int64's to ints
803
803
  if match is not None:
804
804
  match = validate_and_convert(match)
805
-
805
+
806
806
  taxonomy_string = str(match)
807
807
 
808
808
  return TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
@@ -828,15 +828,15 @@ if False:
828
828
  # print(matches)
829
829
 
830
830
  print_taxonomy_matches(matches,verbose=True)
831
-
831
+
832
832
  print('\n\n')
833
-
833
+
834
834
  # Print the taxonomy in the taxonomy spreadsheet format
835
835
  assert matches[1]['source'] == 'inat'
836
836
  t = str(matches[1]['taxonomy'])
837
837
  print(t)
838
838
  import clipboard; clipboard.copy(t)
839
-
839
+
840
840
 
841
841
  #%% Directly access the taxonomy tables
842
842
 
@@ -848,12 +848,12 @@ if False:
848
848
 
849
849
  #%% Command-line driver
850
850
 
851
- def main():
851
+ def main(): # noqa
852
852
 
853
853
  # Read command line inputs (absolute path)
854
854
  parser = argparse.ArgumentParser()
855
855
  parser.add_argument('input_file')
856
-
856
+
857
857
  if len(sys.argv[1:]) == 0:
858
858
  parser.print_help()
859
859
  parser.exit()
@@ -36,7 +36,7 @@ def check_taxonomy_csv(csv_path: str) -> None:
36
36
  """
37
37
  See module docstring.
38
38
  """
39
-
39
+
40
40
  taxonomy_df = pd.read_csv(csv_path)
41
41
 
42
42
  graph = nx.DiGraph()
@@ -46,12 +46,12 @@ def check_taxonomy_csv(csv_path: str) -> None:
46
46
  num_scientific_name_errors = 0
47
47
 
48
48
  for i_row, row in taxonomy_df.iterrows():
49
-
49
+
50
50
  ds = row['dataset_name']
51
51
  ds_label = row['query']
52
52
  scientific_name = row['scientific_name']
53
53
  level = row['taxonomy_level']
54
-
54
+
55
55
  # This used to represent the source of the mapping: iNat, gbif, or manual. We've
56
56
  # stopped tracking this, so this is now vestigial.
57
57
  id_source = 0 # row['source']
@@ -95,8 +95,8 @@ def check_taxonomy_csv(csv_path: str) -> None:
95
95
  num_scientific_name_errors += 1
96
96
 
97
97
  taxon_child = node
98
-
99
- # ...for each row in the taxonomy file
98
+
99
+ # ...for each row in the taxonomy file
100
100
 
101
101
  assert nx.is_directed_acyclic_graph(graph)
102
102
 
@@ -124,36 +124,36 @@ def check_taxonomy_csv(csv_path: str) -> None:
124
124
  print(f'At least one node has unresolved ambiguous parents: {e}')
125
125
 
126
126
  print('Processed {} rows from {}'.format(len(taxonomy_df),csv_path))
127
-
127
+
128
128
  print('num taxon level errors:', num_taxon_level_errors)
129
129
  print('num scientific name errors:', num_scientific_name_errors)
130
130
 
131
131
 
132
132
  #%% Command-line driver
133
-
133
+
134
134
  if __name__ == '__main__':
135
-
135
+
136
136
  parser = argparse.ArgumentParser()
137
137
  parser.add_argument(
138
138
  'taxonomy_csv_path',
139
139
  help='path to taxonomy CSV file')
140
-
140
+
141
141
  if len(sys.argv[1:]) == 0:
142
142
  parser.print_help()
143
143
  parser.exit()
144
-
144
+
145
145
  args = parser.parse_args()
146
146
 
147
147
  check_taxonomy_csv(args.taxonomy_csv_path)
148
148
 
149
149
 
150
150
  #%% Interactive driver
151
-
151
+
152
152
  if False:
153
-
153
+
154
154
  #%%
155
-
155
+
156
156
  import os
157
157
  csv_path = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
158
158
  check_taxonomy_csv(csv_path)
159
-
159
+
@@ -1,4 +1,4 @@
1
- """
1
+ r"""
2
2
 
3
3
  taxonomy_graph.py
4
4
 
@@ -69,7 +69,7 @@ class TaxonNode:
69
69
  By default, we support multiple parents for each TaxonNode. See discussion
70
70
  in module docstring above.
71
71
  """
72
-
72
+
73
73
  # class variables
74
74
  single_parent_only: ClassVar[bool] = False
75
75
 
@@ -82,7 +82,7 @@ class TaxonNode:
82
82
 
83
83
  def __init__(self, level: str, name: str,
84
84
  graph: Optional[nx.DiGraph] = None):
85
-
85
+
86
86
  self.level = level
87
87
  self.name = name
88
88
  self.graph = graph
@@ -131,7 +131,7 @@ class TaxonNode:
131
131
  Args:
132
132
  parent: TaxonNode, must be higher in the taxonomical hierarchy
133
133
  """
134
-
134
+
135
135
  assert self.graph is not None
136
136
  parents = self.parents
137
137
  if TaxonNode.single_parent_only and len(parents) > 0:
@@ -150,7 +150,7 @@ class TaxonNode:
150
150
  Args:
151
151
  child: TaxonNode, must be lower in the taxonomical hierarchy
152
152
  """
153
-
153
+
154
154
  assert self.graph is not None
155
155
  self.graph.add_edge(self, child)
156
156
 
@@ -160,7 +160,7 @@ class TaxonNode:
160
160
  ds: str, name of dataset
161
161
  ds_label: str, name of label used by that dataset
162
162
  """
163
-
163
+
164
164
  self.dataset_labels.add((ds, ds_label))
165
165
 
166
166
  def get_dataset_labels(self,
@@ -176,7 +176,7 @@ class TaxonNode:
176
176
 
177
177
  Returns: set of (ds, ds_label) tuples
178
178
  """
179
-
179
+
180
180
  result = self.dataset_labels
181
181
  if include_datasets is not None:
182
182
  result = set(tup for tup in result if tup[0] in include_datasets)
@@ -199,7 +199,7 @@ class TaxonNode:
199
199
 
200
200
  Returns: TaxonNode, the LCA if it exists, or None if no LCA exists
201
201
  """
202
-
202
+
203
203
  paths = []
204
204
  for node in nodes:
205
205
  # get path to root
@@ -242,7 +242,7 @@ def build_taxonomy_graph(taxonomy_df: pd.DataFrame
242
242
  TaxonNode node in the tree that contains the label,
243
243
  keys are all lowercase
244
244
  """
245
-
245
+
246
246
  graph = nx.DiGraph()
247
247
  taxon_to_node = {} # maps (taxon_level, taxon_name) to a TaxonNode
248
248
  label_to_node = {} # maps (dataset_name, dataset_label) to a TaxonNode
@@ -303,12 +303,12 @@ def dag_to_tree(graph: nx.DiGraph,
303
303
  component separately.
304
304
 
305
305
  Args:
306
- graph: nx.DiGraph, DAG representation of taxonomy hieararchy
306
+ graph: nx.DiGraph, DAG representation of taxonomy hierarchy
307
307
  taxon_to_node: dict, maps (taxon_level, taxon_name) to a TaxonNode
308
308
 
309
309
  Returns: nx.DiGraph, a tree-structured graph
310
310
  """
311
-
311
+
312
312
  tree = nx.DiGraph()
313
313
  for node in graph.nodes:
314
314
  tree.add_node(node)
@@ -17,9 +17,9 @@ from megadetector.data_management.lila.lila_common import read_lila_taxonomy_map
17
17
  #%% Prevent execution during infrastructural imports
18
18
 
19
19
  if False:
20
-
20
+
21
21
  #%% Constants
22
-
22
+
23
23
  lila_local_base = os.path.expanduser('~/lila')
24
24
 
25
25
  metadata_dir = os.path.join(lila_local_base,'metadata')
@@ -30,7 +30,7 @@ if False:
30
30
  lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
31
31
 
32
32
  assert os.path.isfile(lila_dataset_to_categories_file)
33
-
33
+
34
34
 
35
35
  #%% Load category and taxonomy files
36
36
 
@@ -48,36 +48,36 @@ if False:
48
48
 
49
49
  # i_row = 1; row = taxonomy_df.iloc[i_row]; row
50
50
  for i_row,row in taxonomy_df.iterrows():
51
-
51
+
52
52
  ds_query = row['dataset_name'] + ':' + row['query']
53
53
  ds_query = ds_query.lower()
54
-
54
+
55
55
  if not isinstance(row['scientific_name'],str):
56
56
  unmapped_queries.add(ds_query)
57
57
  ds_query_to_scientific_name[ds_query] = 'unmapped'
58
58
  continue
59
-
59
+
60
60
  ds_query_to_scientific_name[ds_query] = row['scientific_name']
61
-
62
-
61
+
62
+
63
63
  #%% For each dataset, make sure we can map every category to the taxonomy
64
64
 
65
65
  # dataset_name = list(lila_dataset_to_categories.keys())[0]
66
66
  for _dataset_name in lila_dataset_to_categories.keys():
67
-
67
+
68
68
  if '_bbox' in _dataset_name:
69
69
  dataset_name = _dataset_name.replace('_bbox','')
70
70
  else:
71
71
  dataset_name = _dataset_name
72
-
72
+
73
73
  categories = lila_dataset_to_categories[dataset_name]
74
-
74
+
75
75
  # c = categories[0]
76
76
  for c in categories:
77
77
  ds_query = dataset_name + ':' + c['name']
78
78
  ds_query = ds_query.lower()
79
-
79
+
80
80
  if ds_query not in ds_query_to_scientific_name:
81
- print('Could not find mapping for {}'.format(ds_query))
81
+ print('Could not find mapping for {}'.format(ds_query))
82
82
  else:
83
83
  scientific_name = ds_query_to_scientific_name[ds_query]
@@ -12,8 +12,8 @@ Requires azure-storage-blob>=12.4.0
12
12
 
13
13
  import json
14
14
 
15
- from typing import Any, Iterable, List, Optional, Tuple, Union
16
- from azure.storage.blob import BlobPrefix, ContainerClient
15
+ from typing import Any, Iterable, Optional, Union
16
+ from azure.storage.blob import BlobPrefix, ContainerClient # type: ignore
17
17
 
18
18
  from megadetector.utils import path_utils
19
19
  from megadetector.utils import sas_blob_utils
@@ -26,20 +26,20 @@ def walk_container(container_client: ContainerClient,
26
26
  prefix: str = '',
27
27
  store_folders: bool = True,
28
28
  store_blobs: bool = True,
29
- debug_max_items: int = -1) -> Tuple[List[str], List[str]]:
29
+ debug_max_items: int = -1) -> tuple[list[str], list[str]]:
30
30
  """
31
31
  Recursively walk folders a Azure Blob Storage container.
32
32
 
33
33
  Based on:
34
34
  https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/storage/azure-storage-blob/samples/blob_samples_walk_blob_hierarchy.py
35
35
  """
36
-
36
+
37
37
  depth = 1
38
38
 
39
39
  def walk_blob_hierarchy(prefix: str,
40
- folders: Optional[List[str]] = None,
41
- blobs: Optional[List[str]] = None
42
- ) -> Tuple[List[str], List[str]]:
40
+ folders: Optional[list[str]] = None,
41
+ blobs: Optional[list[str]] = None
42
+ ) -> tuple[list[str], list[str]]:
43
43
  if folders is None:
44
44
  folders = []
45
45
  if blobs is None:
@@ -76,11 +76,11 @@ def walk_container(container_client: ContainerClient,
76
76
  return folders, blobs
77
77
 
78
78
 
79
- def list_top_level_blob_folders(container_client: ContainerClient) -> List[str]:
79
+ def list_top_level_blob_folders(container_client: ContainerClient) -> list[str]:
80
80
  """
81
81
  List all top-level folders in a container.
82
82
  """
83
-
83
+
84
84
  top_level_folders, _ = walk_container(
85
85
  container_client, max_depth=1, store_blobs=False)
86
86
  return top_level_folders
@@ -88,13 +88,13 @@ def list_top_level_blob_folders(container_client: ContainerClient) -> List[str]:
88
88
 
89
89
  def concatenate_json_lists(input_files: Iterable[str],
90
90
  output_file: Optional[str] = None
91
- ) -> List[Any]:
91
+ ) -> list[Any]:
92
92
  """
93
93
  Given a list of JSON files that contain lists (typically string
94
94
  filenames), concatenates the lists into a single list and optionally
95
95
  writes out this list to a new output JSON file.
96
96
  """
97
-
97
+
98
98
  output_list = []
99
99
  for fn in input_files:
100
100
  with open(fn, 'r') as f:
@@ -116,12 +116,12 @@ def upload_file_to_blob(account_name: str,
116
116
  Uploads a local file to Azure Blob Storage and returns the uploaded
117
117
  blob URI with SAS token.
118
118
  """
119
-
119
+
120
120
  container_uri = sas_blob_utils.build_azure_storage_uri(
121
121
  account=account_name, container=container_name, sas_token=sas_token)
122
122
  with open(local_path, 'rb') as data:
123
123
  return sas_blob_utils.upload_blob(
124
- container_uri=container_uri, blob_name=blob_name, data=data,
124
+ container_uri=container_uri, blob_name=blob_name, data=data,
125
125
  overwrite=overwrite)
126
126
 
127
127
 
@@ -131,11 +131,11 @@ def enumerate_blobs_to_file(
131
131
  container_name: str,
132
132
  sas_token: Optional[str] = None,
133
133
  blob_prefix: Optional[str] = None,
134
- blob_suffix: Optional[Union[str, Tuple[str]]] = None,
134
+ blob_suffix: Optional[Union[str, tuple[str]]] = None,
135
135
  rsearch: Optional[str] = None,
136
136
  limit: Optional[int] = None,
137
137
  verbose: Optional[bool] = True
138
- ) -> List[str]:
138
+ ) -> list[str]:
139
139
  """
140
140
  Enumerates blobs in a container, and writes the blob names to an output
141
141
  file.
@@ -143,7 +143,7 @@ def enumerate_blobs_to_file(
143
143
  Args:
144
144
  output_file: str, path to save list of files in container
145
145
  If ends in '.json', writes a JSON string. Otherwise, writes a
146
- newline-delimited list. Can be None, in which case this is just a
146
+ newline-delimited list. Can be None, in which case this is just a
147
147
  convenient wrapper for blob enumeration.
148
148
  account_name: str, Azure Storage account name
149
149
  container_name: str, Azure Blob Storage container name
@@ -155,24 +155,24 @@ def enumerate_blobs_to_file(
155
155
  be lowercased first before comparing with the suffix(es).
156
156
  rsearch: optional str, returned results will only contain blob names
157
157
  that match this regex. Can also be a list of regexes, in which case
158
- blobs matching *any* of the regex's will be returned.
158
+ blobs matching *any* of the regex's will be returned.
159
159
  limit: int, maximum # of blob names to list
160
160
  if None, then returns all blob names
161
161
 
162
162
  Returns: list of str, sorted blob names, of length limit or shorter.
163
163
  """
164
-
164
+
165
165
  if sas_token is not None and len(sas_token) > 9 and sas_token[0] == '?':
166
166
  sas_token = sas_token[1:]
167
-
167
+
168
168
  container_uri = sas_blob_utils.build_azure_storage_uri(
169
169
  account=account_name, container=container_name, sas_token=sas_token)
170
-
170
+
171
171
  matched_blobs = sas_blob_utils.list_blobs_in_container(
172
172
  container_uri=container_uri, blob_prefix=blob_prefix,
173
173
  blob_suffix=blob_suffix, rsearch=rsearch, limit=limit, verbose=verbose)
174
-
174
+
175
175
  if output_file is not None:
176
176
  path_utils.write_list_to_file(output_file, matched_blobs)
177
-
177
+
178
178
  return matched_blobs