megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,159 +0,0 @@
1
- """
2
-
3
- taxonomy_csv_checker.py
4
-
5
- Checks the taxonomy CSV file to make sure that for each row:
6
-
7
- 1) The 'taxonomy_level' column matches the lowest-level taxon level in the
8
- 'taxonomy_string' column.
9
-
10
- 2) The 'scientific_name' column matches the scientific name from the
11
- lowest-level taxon level in the 'taxonomy_string' column.
12
-
13
- Prints out any mismatches.
14
-
15
- Also prints out nodes that have 2 ambiguous parents. See "CASE 2" from the
16
- module docstring of taxonomy_graph.py.
17
-
18
- """
19
-
20
- #%% Imports
21
-
22
- import sys
23
- import argparse
24
-
25
- import networkx as nx
26
- import pandas as pd
27
-
28
- from typing import Optional
29
-
30
- from taxonomy_mapping.taxonomy_graph import TaxonNode, dag_to_tree
31
-
32
-
33
- #%% Taxonomy checking
34
-
35
- def check_taxonomy_csv(csv_path: str) -> None:
36
- """
37
- See module docstring.
38
- """
39
-
40
- taxonomy_df = pd.read_csv(csv_path)
41
-
42
- graph = nx.DiGraph()
43
- taxon_to_node = {} # maps (taxon_level, taxon_name) to a TaxonNode
44
-
45
- num_taxon_level_errors = 0
46
- num_scientific_name_errors = 0
47
-
48
- for i_row, row in taxonomy_df.iterrows():
49
-
50
- ds = row['dataset_name']
51
- ds_label = row['query']
52
- scientific_name = row['scientific_name']
53
- level = row['taxonomy_level']
54
-
55
- # This used to represent the source of the mapping: iNat, gbif, or manual. We've
56
- # stopped tracking this, so this is now vestigial.
57
- id_source = 0 # row['source']
58
-
59
- taxa_ancestry = row['taxonomy_string']
60
- if pd.isna(taxa_ancestry):
61
- # taxonomy CSV rows without 'taxonomy_string' entries are excluded
62
- # from the taxonomy graph, but can be included in a classification
63
- # label specification JSON via the 'dataset_labels' key
64
- continue
65
- else:
66
- taxa_ancestry = eval(taxa_ancestry) # pylint: disable=eval-used
67
-
68
- taxon_child: Optional[TaxonNode] = None
69
- for j, taxon in enumerate(taxa_ancestry):
70
- taxon_id, taxon_level, taxon_name, _ = taxon
71
-
72
- key = (taxon_level, taxon_name)
73
- if key not in taxon_to_node:
74
- taxon_to_node[key] = TaxonNode(level=taxon_level,
75
- name=taxon_name, graph=graph)
76
- node = taxon_to_node[key]
77
-
78
- if taxon_child is not None:
79
- node.add_child(taxon_child)
80
-
81
- node.add_id(id_source, int(taxon_id)) # np.int64 -> int
82
- if j == 0:
83
- if level != taxon_level:
84
- print(f'row: {i_row}, {ds}, {ds_label}')
85
- print(f'- taxonomy_level column: {level}, '
86
- f'level from taxonomy_string: {taxon_level}')
87
- print()
88
- num_taxon_level_errors += 1
89
-
90
- if scientific_name != taxon_name:
91
- print(f'row: {i_row}, {ds}, {ds_label}')
92
- print(f'- scientific_name column: {scientific_name}, '
93
- f'name from taxonomy_string: {taxon_name}')
94
- print()
95
- num_scientific_name_errors += 1
96
-
97
- taxon_child = node
98
-
99
- # ...for each row in the taxonomy file
100
-
101
- assert nx.is_directed_acyclic_graph(graph)
102
-
103
- for node in graph.nodes:
104
- assert len(node.parents) <= 2
105
- if len(node.parents) == 2:
106
- p0 = node.parents[0]
107
- p1 = node.parents[1]
108
- assert p0 is not p1
109
-
110
- p0_is_ancestor_of_p1 = p1 in nx.descendants(graph, p0)
111
- p1_is_ancestor_of_p0 = p0 in nx.descendants(graph, p1)
112
- if not p0_is_ancestor_of_p1 and not p1_is_ancestor_of_p0:
113
- print('Node with two ambiguous parents:', node)
114
- print('\t', p0)
115
- print('\t\t', p0.parents)
116
- print('\t', p1)
117
- print('\t\t', p1.parents)
118
-
119
- try:
120
- dag_to_tree(graph, taxon_to_node)
121
- print('All ambiguous parents have hard-coded resolution in '
122
- 'dag_to_tree().')
123
- except AssertionError as e:
124
- print(f'At least one node has unresolved ambiguous parents: {e}')
125
-
126
- print('Processed {} rows from {}'.format(len(taxonomy_df),csv_path))
127
-
128
- print('num taxon level errors:', num_taxon_level_errors)
129
- print('num scientific name errors:', num_scientific_name_errors)
130
-
131
-
132
- #%% Command-line driver
133
-
134
- if __name__ == '__main__':
135
-
136
- parser = argparse.ArgumentParser()
137
- parser.add_argument(
138
- 'taxonomy_csv_path',
139
- help='path to taxonomy CSV file')
140
-
141
- if len(sys.argv[1:]) == 0:
142
- parser.print_help()
143
- parser.exit()
144
-
145
- args = parser.parse_args()
146
-
147
- check_taxonomy_csv(args.taxonomy_csv_path)
148
-
149
-
150
- #%% Interactive driver
151
-
152
- if False:
153
-
154
- #%%
155
-
156
- import os
157
- csv_path = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
158
- check_taxonomy_csv(csv_path)
159
-
@@ -1,346 +0,0 @@
1
- """
2
-
3
- taxonomy_graph.py
4
-
5
- Methods for transforming taxonomy CSV into a graph structure backed by
6
- NetworkX.
7
-
8
- We treat each taxon in the taxonomy as a node in a graph, represented by the
9
- TaxonNode class. We use a NetworkX directed graph (nx.DiGraph) to keep track of
10
- the edges (parent-child relationships) between the nodes.
11
-
12
- In theory, the true biological taxonomy graph should be a tree, where every
13
- taxon node has exactly 1 parent. However, because we use both GBIF and INAT
14
- taxonomies, there are 2 situations where a taxon node ends up with two parents.
15
- Thus, the graph is actually a "directed acyclic graph" (DAG) instead of a tree.
16
-
17
- The two situations are explained in detail below. This module includes a
18
- function dag_to_tree() which converts a DAG to a tree by heuristically removing
19
- edges from the DAG so that each node only has 1 parent.
20
-
21
- CASE 1: INAT and GBIF have different granularity in their taxonomy levels
22
- ======
23
- An example is shown below. In dag_to_tree(), the lower parent is kept, while
24
- the higher-up parent is discarded. In this example, the "sciurini -> sciurus"
25
- edge would be kept, while "sciuridae -> sciurus" would be removed.
26
-
27
- "eastern gray squirrel" (inat) "squirrel" (gbif)
28
- ------------------------------ -----------------
29
- family: sciuridae
30
- / \
31
- subfamily: sciurinae | # skips subfamily
32
- | |
33
- tribe: sciurini | # skips tribe
34
- \ /
35
- genus: sciurus
36
-
37
-
38
- CASE 2: INAT and GBIF have different taxonomies
39
- ======
40
- An example is shown below. In dag_to_tree(), the resolution to these
41
- discrepancies are hard-coded.
42
-
43
- order: cathartiformes (inat) accipitriformes (gbif)
44
- \ /
45
- family: cathartidae
46
-
47
- """
48
-
49
- #%% Imports and constants
50
-
51
- # allow forward references in typing annotations
52
- from __future__ import annotations
53
-
54
- from typing import (ClassVar, Container, Dict, Iterable, List, Optional, Set,
55
- Tuple)
56
-
57
- import networkx as nx
58
- import pandas as pd
59
-
60
- default_source = 'inat'
61
-
62
-
63
- #%% Classes
64
-
65
- class TaxonNode:
66
- """
67
- A node in a taxonomy graph (DAG), associated with a set of dataset labels.
68
-
69
- By default, we support multiple parents for each TaxonNode. See discussion
70
- in module docstring above.
71
- """
72
-
73
- # class variables
74
- single_parent_only: ClassVar[bool] = False
75
-
76
- # instance variables
77
- level: str
78
- name: str
79
- ids: Set[Tuple[str, int]]
80
- graph: Optional[nx.DiGraph]
81
- dataset_labels: Set[Tuple[str, str]]
82
-
83
- def __init__(self, level: str, name: str,
84
- graph: Optional[nx.DiGraph] = None):
85
-
86
- self.level = level
87
- self.name = name
88
- self.graph = graph
89
- self.ids = set()
90
- self.dataset_labels = set()
91
-
92
- def __repr__(self):
93
- id_str = ', '.join(f'{source}={id}' for source, id in self.ids)
94
- return f'TaxonNode({id_str}, level={self.level}, name={self.name})'
95
-
96
- @property # read-only getter
97
- def parents(self) -> List[TaxonNode]:
98
- assert self.graph is not None
99
- return list(self.graph.predecessors(self))
100
-
101
- @parents.setter
102
- def parents(self, parents: Iterable[TaxonNode]) -> None:
103
- assert self.graph is not None
104
- for p in self.parents:
105
- self.graph.remove_edge(p, self)
106
- for p in parents:
107
- self.graph.add_edge(p, self)
108
-
109
- @property # read-only getter
110
- def children(self) -> List[TaxonNode]:
111
- assert self.graph is not None
112
- return list(self.graph.successors(self))
113
-
114
- @children.setter
115
- def children(self, children: Iterable[TaxonNode]) -> None:
116
- assert self.graph is not None
117
- for c in self.children:
118
- self.graph.remove_edge(self, c)
119
- for c in children:
120
- self.graph.add_edge(self, c)
121
-
122
- def add_id(self, source: str, taxon_id: int) -> None:
123
- # assert source in ['gbif', 'inat', 'manual']
124
- self.ids.add((source, taxon_id))
125
-
126
- def add_parent(self, parent: TaxonNode) -> None:
127
- """
128
- Adds a TaxonNode to the list of parents of the current TaxonNode.
129
- Requires this TaxonNode to be associated with a Graph.
130
-
131
- Args:
132
- parent: TaxonNode, must be higher in the taxonomical hierarchy
133
- """
134
-
135
- assert self.graph is not None
136
- parents = self.parents
137
- if TaxonNode.single_parent_only and len(parents) > 0:
138
- assert len(parents) == 1
139
- assert parents[0] is parent, (
140
- f'self.parents: {parents}, new parent: {parent}')
141
- return
142
- if parent not in parents:
143
- self.graph.add_edge(parent, self)
144
-
145
- def add_child(self, child: TaxonNode) -> None:
146
- """
147
- Adds a TaxonNode to the list of children of the current TaxonNode.
148
- Requires this TaxonNode to be associated with a Graph.
149
-
150
- Args:
151
- child: TaxonNode, must be lower in the taxonomical hierarchy
152
- """
153
-
154
- assert self.graph is not None
155
- self.graph.add_edge(self, child)
156
-
157
- def add_dataset_label(self, ds: str, ds_label: str) -> None:
158
- """
159
- Args:
160
- ds: str, name of dataset
161
- ds_label: str, name of label used by that dataset
162
- """
163
-
164
- self.dataset_labels.add((ds, ds_label))
165
-
166
- def get_dataset_labels(self,
167
- include_datasets: Optional[Container[str]] = None
168
- ) -> Set[Tuple[str, str]]:
169
- """
170
- Returns a set of all (ds, ds_label) tuples that belong to this taxon
171
- node or its descendants.
172
-
173
- Args:
174
- include_datasets: list of str, names of datasets to include
175
- if None, then all datasets are included
176
-
177
- Returns: set of (ds, ds_label) tuples
178
- """
179
-
180
- result = self.dataset_labels
181
- if include_datasets is not None:
182
- result = set(tup for tup in result if tup[0] in include_datasets)
183
-
184
- for child in self.children:
185
- result |= child.get_dataset_labels(include_datasets)
186
- return result
187
-
188
- @classmethod
189
- def lowest_common_ancestor(cls, nodes: Iterable[TaxonNode]
190
- ) -> Optional[TaxonNode]:
191
- """
192
- Returns the lowest common ancestor (LCA) of a list or set of nodes.
193
-
194
- For each node in <nodes>, get the set of nodes on the path to the root.
195
- The LCA of <nodes> is certainly in the intersection of these sets.
196
- Iterate through the nodes in this set intersection, looking for a node
197
- such that none of its children is in this intersection. Given n nodes
198
- from a k-ary tree of height h, the algorithm runs in O((n + k)h).
199
-
200
- Returns: TaxonNode, the LCA if it exists, or None if no LCA exists
201
- """
202
-
203
- paths = []
204
- for node in nodes:
205
- # get path to root
206
- path = {node}
207
- remaining = node.parents.copy() # make a shallow copy
208
- while len(remaining) > 0:
209
- x = remaining.pop()
210
- if x not in path:
211
- path.add(x)
212
- remaining += x.parents
213
- paths.append(path)
214
- intersect = set.intersection(*paths)
215
-
216
- for node in intersect:
217
- if intersect.isdisjoint(node.children):
218
- return node
219
- return None
220
-
221
-
222
- #%% Module functions
223
-
224
- def build_taxonomy_graph(taxonomy_df: pd.DataFrame
225
- ) -> Tuple[
226
- nx.DiGraph,
227
- Dict[Tuple[str, str], TaxonNode],
228
- Dict[Tuple[str, str], TaxonNode]
229
- ]:
230
- """
231
- Creates a mapping from (taxon_level, taxon_name) to TaxonNodes, used for
232
- gathering all dataset labels associated with a given taxon.
233
-
234
- Args:
235
- taxonomy_df: pd.DataFrame, the taxonomy CSV
236
-
237
- Returns:
238
- graph: nx.DiGraph
239
- taxon_to_node: dict, maps (taxon_level, taxon_name) to a TaxonNode,
240
- keys are all lowercase
241
- label_to_node: dict, maps (dataset_name, dataset_label) to the lowest
242
- TaxonNode node in the tree that contains the label,
243
- keys are all lowercase
244
- """
245
-
246
- graph = nx.DiGraph()
247
- taxon_to_node = {} # maps (taxon_level, taxon_name) to a TaxonNode
248
- label_to_node = {} # maps (dataset_name, dataset_label) to a TaxonNode
249
- for _, row in taxonomy_df.iterrows():
250
- ds = row['dataset_name'].lower()
251
- ds_label = row['query'].lower()
252
- if 'source' in row:
253
- id_source = row['source']
254
- else:
255
- id_source = default_source
256
- taxa_ancestry = row['taxonomy_string']
257
- if pd.isna(taxa_ancestry):
258
- # taxonomy CSV rows without 'taxonomy_string' entries are excluded
259
- # from the taxonomy graph, but can be included in a classification
260
- # label specification JSON via the 'dataset_labels' key
261
- continue
262
- else:
263
- taxa_ancestry = eval(taxa_ancestry) # pylint: disable=eval-used
264
-
265
- taxon_child: Optional[TaxonNode] = None
266
- for i, taxon in enumerate(taxa_ancestry):
267
- taxon_id, taxon_level, taxon_name, _ = taxon
268
- taxon_level = taxon_level.lower()
269
- taxon_name = taxon_name.lower()
270
-
271
- key = (taxon_level, taxon_name)
272
- if key not in taxon_to_node:
273
- taxon_to_node[key] = TaxonNode(level=taxon_level,
274
- name=taxon_name, graph=graph)
275
- node = taxon_to_node[key]
276
-
277
- if taxon_child is not None:
278
- node.add_child(taxon_child)
279
-
280
- node.add_id(id_source, int(taxon_id)) # np.int64 -> int
281
- if i == 0:
282
- assert row['taxonomy_level'] == taxon_level, (
283
- f'taxonomy CSV level: {row["taxonomy_level"]}, '
284
- f'level from taxonomy_string: {taxon_level}')
285
- assert row['scientific_name'] == taxon_name
286
- node.add_dataset_label(ds, ds_label)
287
- label_to_node[(ds, ds_label)] = node
288
-
289
- taxon_child = node
290
-
291
- assert nx.is_directed_acyclic_graph(graph)
292
- return graph, taxon_to_node, label_to_node
293
-
294
-
295
- def dag_to_tree(graph: nx.DiGraph,
296
- taxon_to_node: Dict[Tuple[str, str], TaxonNode]) -> nx.DiGraph:
297
- """
298
- Converts the taxonomy graph from a DAG to a tree. See module docstring
299
- for more information.
300
-
301
- NOTE: nx.is_tree() on the output of this function might fail because the
302
- tree may have disconnected components. Instead, check nx.is_tree() on each
303
- component separately.
304
-
305
- Args:
306
- graph: nx.DiGraph, DAG representation of taxonomy hieararchy
307
- taxon_to_node: dict, maps (taxon_level, taxon_name) to a TaxonNode
308
-
309
- Returns: nx.DiGraph, a tree-structured graph
310
- """
311
-
312
- tree = nx.DiGraph()
313
- for node in graph.nodes:
314
- tree.add_node(node)
315
-
316
- if len(node.parents) == 1:
317
- tree.add_edge(node.parents[0], node)
318
-
319
- elif len(node.parents) == 2:
320
- p0 = node.parents[0]
321
- p1 = node.parents[1]
322
-
323
- # use the lower parent
324
- if p1 in nx.descendants(graph, p0):
325
- tree.add_edge(p1, node)
326
- elif p0 in nx.descendants(graph, p1):
327
- tree.add_edge(p0, node)
328
- else:
329
- # special cases
330
- if node.name == 'cathartidae':
331
- p = taxon_to_node[('order', 'accipitriformes')]
332
- elif node.name == 'soricidae':
333
- p = taxon_to_node[('order', 'eulipotyphla')]
334
- elif node.name == 'nyctanassa violacea':
335
- p = taxon_to_node[('genus', 'nyctanassa')]
336
- elif node.name == 'trochilidae': # this one is controversial
337
- p = taxon_to_node[('order', 'caprimulgiformes')]
338
- else:
339
- assert False
340
-
341
- assert (p is p0) or (p is p1)
342
- tree.add_edge(p, node)
343
-
344
- for node in tree.nodes:
345
- node.graph = tree
346
- return tree
@@ -1,83 +0,0 @@
1
- """
2
-
3
- validate_lila_category_mappings.py
4
-
5
- Confirm that all category names on LILA have mappings in the taxonomy file.
6
-
7
- """
8
-
9
- #%% Constants and imports
10
-
11
- import json
12
- import os
13
-
14
- from data_management.lila.lila_common import read_lila_taxonomy_mapping
15
-
16
-
17
- #%% Prevent execution during infrastructural imports
18
-
19
- if False:
20
-
21
- #%% Constants
22
-
23
- lila_local_base = os.path.expanduser('~/lila')
24
-
25
- metadata_dir = os.path.join(lila_local_base,'metadata')
26
- os.makedirs(metadata_dir,exist_ok=True)
27
-
28
- # Created by get_lila_category_list.py... contains counts for each category
29
- category_list_dir = os.path.join(lila_local_base,'lila_categories_list')
30
- lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
31
-
32
- assert os.path.isfile(lila_dataset_to_categories_file)
33
-
34
-
35
- #%% Load category and taxonomy files
36
-
37
- with open(lila_dataset_to_categories_file,'r') as f:
38
- lila_dataset_to_categories = json.load(f)
39
-
40
- taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
41
-
42
-
43
- #%% Map dataset names and category names to scientific names
44
-
45
- ds_query_to_scientific_name = {}
46
-
47
- unmapped_queries = set()
48
-
49
- # i_row = 1; row = taxonomy_df.iloc[i_row]; row
50
- for i_row,row in taxonomy_df.iterrows():
51
-
52
- ds_query = row['dataset_name'] + ':' + row['query']
53
- ds_query = ds_query.lower()
54
-
55
- if not isinstance(row['scientific_name'],str):
56
- unmapped_queries.add(ds_query)
57
- ds_query_to_scientific_name[ds_query] = 'unmapped'
58
- continue
59
-
60
- ds_query_to_scientific_name[ds_query] = row['scientific_name']
61
-
62
-
63
- #%% For each dataset, make sure we can map every category to the taxonomy
64
-
65
- # dataset_name = list(lila_dataset_to_categories.keys())[0]
66
- for _dataset_name in lila_dataset_to_categories.keys():
67
-
68
- if '_bbox' in _dataset_name:
69
- dataset_name = _dataset_name.replace('_bbox','')
70
- else:
71
- dataset_name = _dataset_name
72
-
73
- categories = lila_dataset_to_categories[dataset_name]
74
-
75
- # c = categories[0]
76
- for c in categories:
77
- ds_query = dataset_name + ':' + c['name']
78
- ds_query = ds_query.lower()
79
-
80
- if ds_query not in ds_query_to_scientific_name:
81
- print('Could not find mapping for {}'.format(ds_query))
82
- else:
83
- scientific_name = ds_query_to_scientific_name[ds_query]