PyPI - megadetector - Versions diffs - 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl - Mend

megadetector 5.0.9py3-none-any.whl → 5.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show

{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
megadetector-5.0.11.dist-info/RECORD +5 -0
megadetector-5.0.11.dist-info/top_level.txt +1 -0
api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -439
api/batch_processing/api_core/server.py +0 -294
api/batch_processing/api_core/server_api_config.py +0 -98
api/batch_processing/api_core/server_app_config.py +0 -55
api/batch_processing/api_core/server_batch_job_manager.py +0 -220
api/batch_processing/api_core/server_job_status_table.py +0 -152
api/batch_processing/api_core/server_orchestration.py +0 -360
api/batch_processing/api_core/server_utils.py +0 -92
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -152
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
api/batch_processing/data_preparation/manage_video_batch.py +0 -327
api/batch_processing/integration/digiKam/setup.py +0 -6
api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +0 -64
api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
api/batch_processing/postprocessing/compare_batch_results.py +0 -958
api/batch_processing/postprocessing/convert_output_format.py +0 -397
api/batch_processing/postprocessing/load_api_results.py +0 -195
api/batch_processing/postprocessing/md_to_coco.py +0 -310
api/batch_processing/postprocessing/md_to_labelme.py +0 -330
api/batch_processing/postprocessing/merge_detections.py +0 -401
api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
api/synchronous/api_core/animal_detection_api/config.py +0 -35
api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +0 -110
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +0 -108
classification/analyze_failed_images.py +0 -227
classification/cache_batchapi_outputs.py +0 -198
classification/create_classification_dataset.py +0 -627
classification/crop_detections.py +0 -516
classification/csv_to_json.py +0 -226
classification/detect_and_crop.py +0 -855
classification/efficientnet/__init__.py +0 -9
classification/efficientnet/model.py +0 -415
classification/efficientnet/utils.py +0 -610
classification/evaluate_model.py +0 -520
classification/identify_mislabeled_candidates.py +0 -152
classification/json_to_azcopy_list.py +0 -63
classification/json_validator.py +0 -695
classification/map_classification_categories.py +0 -276
classification/merge_classification_detection_output.py +0 -506
classification/prepare_classification_script.py +0 -194
classification/prepare_classification_script_mc.py +0 -228
classification/run_classifier.py +0 -286
classification/save_mislabeled.py +0 -110
classification/train_classifier.py +0 -825
classification/train_classifier_tf.py +0 -724
classification/train_utils.py +0 -322
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +0 -34
data_management/camtrap_dp_to_coco.py +0 -238
data_management/cct_json_utils.py +0 -395
data_management/cct_to_md.py +0 -176
data_management/cct_to_wi.py +0 -289
data_management/coco_to_labelme.py +0 -272
data_management/coco_to_yolo.py +0 -662
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +0 -33
data_management/databases/combine_coco_camera_traps_files.py +0 -206
data_management/databases/integrity_check_json_db.py +0 -477
data_management/databases/subset_json_db.py +0 -115
data_management/generate_crops_from_cct.py +0 -149
data_management/get_image_sizes.py +0 -188
data_management/importers/add_nacti_sizes.py +0 -52
data_management/importers/add_timestamps_to_icct.py +0 -79
data_management/importers/animl_results_to_md_results.py +0 -158
data_management/importers/auckland_doc_test_to_json.py +0 -372
data_management/importers/auckland_doc_to_json.py +0 -200
data_management/importers/awc_to_json.py +0 -189
data_management/importers/bellevue_to_json.py +0 -273
data_management/importers/cacophony-thermal-importer.py +0 -796
data_management/importers/carrizo_shrubfree_2018.py +0 -268
data_management/importers/carrizo_trail_cam_2017.py +0 -287
data_management/importers/cct_field_adjustments.py +0 -57
data_management/importers/channel_islands_to_cct.py +0 -913
data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
data_management/importers/eMammal/eMammal_helpers.py +0 -249
data_management/importers/eMammal/make_eMammal_json.py +0 -223
data_management/importers/ena24_to_json.py +0 -275
data_management/importers/filenames_to_json.py +0 -385
data_management/importers/helena_to_cct.py +0 -282
data_management/importers/idaho-camera-traps.py +0 -1407
data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
data_management/importers/jb_csv_to_json.py +0 -150
data_management/importers/mcgill_to_json.py +0 -250
data_management/importers/missouri_to_json.py +0 -489
data_management/importers/nacti_fieldname_adjustments.py +0 -79
data_management/importers/noaa_seals_2019.py +0 -181
data_management/importers/pc_to_json.py +0 -365
data_management/importers/plot_wni_giraffes.py +0 -123
data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
data_management/importers/prepare_zsl_imerit.py +0 -131
data_management/importers/rspb_to_json.py +0 -356
data_management/importers/save_the_elephants_survey_A.py +0 -320
data_management/importers/save_the_elephants_survey_B.py +0 -332
data_management/importers/snapshot_safari_importer.py +0 -758
data_management/importers/snapshot_safari_importer_reprise.py +0 -665
data_management/importers/snapshot_serengeti_lila.py +0 -1067
data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
data_management/importers/sulross_get_exif.py +0 -65
data_management/importers/timelapse_csv_set_to_json.py +0 -490
data_management/importers/ubc_to_json.py +0 -399
data_management/importers/umn_to_json.py +0 -507
data_management/importers/wellington_to_json.py +0 -263
data_management/importers/wi_to_json.py +0 -441
data_management/importers/zamba_results_to_md_results.py +0 -181
data_management/labelme_to_coco.py +0 -548
data_management/labelme_to_yolo.py +0 -272
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +0 -97
data_management/lila/add_locations_to_nacti.py +0 -147
data_management/lila/create_lila_blank_set.py +0 -557
data_management/lila/create_lila_test_set.py +0 -151
data_management/lila/create_links_to_md_results_files.py +0 -106
data_management/lila/download_lila_subset.py +0 -177
data_management/lila/generate_lila_per_image_labels.py +0 -515
data_management/lila/get_lila_annotation_counts.py +0 -170
data_management/lila/get_lila_image_counts.py +0 -111
data_management/lila/lila_common.py +0 -300
data_management/lila/test_lila_metadata_urls.py +0 -132
data_management/ocr_tools.py +0 -874
data_management/read_exif.py +0 -681
data_management/remap_coco_categories.py +0 -84
data_management/remove_exif.py +0 -66
data_management/resize_coco_dataset.py +0 -189
data_management/wi_download_csv_to_coco.py +0 -246
data_management/yolo_output_to_md_output.py +0 -441
data_management/yolo_to_coco.py +0 -676
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/detector_training/model_main_tf2.py +0 -114
detection/process_video.py +0 -703
detection/pytorch_detector.py +0 -337
detection/run_detector.py +0 -779
detection/run_detector_batch.py +0 -1219
detection/run_inference_with_yolov5_val.py +0 -917
detection/run_tiled_inference.py +0 -935
detection/tf_detector.py +0 -188
detection/video_utils.py +0 -606
docs/source/conf.py +0 -43
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +0 -174
md_utils/ct_utils.py +0 -612
md_utils/directory_listing.py +0 -246
md_utils/md_tests.py +0 -968
md_utils/path_utils.py +0 -1044
md_utils/process_utils.py +0 -157
md_utils/sas_blob_utils.py +0 -509
md_utils/split_locations_into_train_val.py +0 -228
md_utils/string_utils.py +0 -92
md_utils/url_utils.py +0 -323
md_utils/write_html_image_list.py +0 -225
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +0 -293
md_visualization/render_images_with_thumbnails.py +0 -275
md_visualization/visualization_utils.py +0 -1537
md_visualization/visualize_db.py +0 -551
md_visualization/visualize_detector_output.py +0 -406
megadetector-5.0.9.dist-info/RECORD +0 -224
megadetector-5.0.9.dist-info/top_level.txt +0 -8
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
taxonomy_mapping/map_new_lila_datasets.py +0 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
taxonomy_mapping/preview_lila_taxonomy.py +0 -591
taxonomy_mapping/retrieve_sample_image.py +0 -71
taxonomy_mapping/simple_image_download.py +0 -218
taxonomy_mapping/species_lookup.py +0 -834
taxonomy_mapping/taxonomy_csv_checker.py +0 -159
taxonomy_mapping/taxonomy_graph.py +0 -346
taxonomy_mapping/validate_lila_category_mappings.py +0 -83
{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0

taxonomy_mapping/taxonomy_csv_checker.py DELETED Viewed

@@ -1,159 +0,0 @@
-"""
-taxonomy_csv_checker.py
-Checks the taxonomy CSV file to make sure that for each row:
-1) The 'taxonomy_level' column matches the lowest-level taxon level in the
-    'taxonomy_string' column.
-2) The 'scientific_name' column matches the scientific name from the
-    lowest-level taxon level in the 'taxonomy_string' column.
-Prints out any mismatches.
-Also prints out nodes that have 2 ambiguous parents. See "CASE 2" from the
-module docstring of taxonomy_graph.py.
-"""
-#%% Imports
-import sys
-import argparse
-import networkx as nx
-import pandas as pd
-from typing import Optional
-from taxonomy_mapping.taxonomy_graph import TaxonNode, dag_to_tree
-#%% Taxonomy checking
-def check_taxonomy_csv(csv_path: str) -> None:
-    """
-    See module docstring.
-    """
-    taxonomy_df = pd.read_csv(csv_path)
-    graph = nx.DiGraph()
-    taxon_to_node = {}  # maps (taxon_level, taxon_name) to a TaxonNode
-    num_taxon_level_errors = 0
-    num_scientific_name_errors = 0
-    for i_row, row in taxonomy_df.iterrows():
-        ds = row['dataset_name']
-        ds_label = row['query']
-        scientific_name = row['scientific_name']
-        level = row['taxonomy_level']
-        # This used to represent the source of the mapping: iNat, gbif, or manual.  We've
-        # stopped tracking this, so this is now vestigial.
-        id_source = 0 # row['source']
-        taxa_ancestry = row['taxonomy_string']
-        if pd.isna(taxa_ancestry):
-            # taxonomy CSV rows without 'taxonomy_string' entries are excluded
-            # from the taxonomy graph, but can be included in a classification
-            # label specification JSON via the 'dataset_labels' key
-            continue
-        else:
-            taxa_ancestry = eval(taxa_ancestry)  # pylint: disable=eval-used
-        taxon_child: Optional[TaxonNode] = None
-        for j, taxon in enumerate(taxa_ancestry):
-            taxon_id, taxon_level, taxon_name, _ = taxon
-            key = (taxon_level, taxon_name)
-            if key not in taxon_to_node:
-                taxon_to_node[key] = TaxonNode(level=taxon_level,
-                                               name=taxon_name, graph=graph)
-            node = taxon_to_node[key]
-            if taxon_child is not None:
-                node.add_child(taxon_child)
-            node.add_id(id_source, int(taxon_id))  # np.int64 -> int
-            if j == 0:
-                if level != taxon_level:
-                    print(f'row: {i_row}, {ds}, {ds_label}')
-                    print(f'- taxonomy_level column: {level}, '
-                          f'level from taxonomy_string: {taxon_level}')
-                    print()
-                    num_taxon_level_errors += 1
-                if scientific_name != taxon_name:
-                    print(f'row: {i_row}, {ds}, {ds_label}')
-                    print(f'- scientific_name column: {scientific_name}, '
-                          f'name from taxonomy_string: {taxon_name}')
-                    print()
-                    num_scientific_name_errors += 1
-            taxon_child = node
-    # ...for each row in the taxonomy file
-    assert nx.is_directed_acyclic_graph(graph)
-    for node in graph.nodes:
-        assert len(node.parents) <= 2
-        if len(node.parents) == 2:
-            p0 = node.parents[0]
-            p1 = node.parents[1]
-            assert p0 is not p1
-            p0_is_ancestor_of_p1 = p1 in nx.descendants(graph, p0)
-            p1_is_ancestor_of_p0 = p0 in nx.descendants(graph, p1)
-            if not p0_is_ancestor_of_p1 and not p1_is_ancestor_of_p0:
-                print('Node with two ambiguous parents:', node)
-                print('\t', p0)
-                print('\t\t', p0.parents)
-                print('\t', p1)
-                print('\t\t', p1.parents)
-    try:
-        dag_to_tree(graph, taxon_to_node)
-        print('All ambiguous parents have hard-coded resolution in '
-              'dag_to_tree().')
-    except AssertionError as e:
-        print(f'At least one node has unresolved ambiguous parents: {e}')
-    print('Processed {} rows from {}'.format(len(taxonomy_df),csv_path))
-    print('num taxon level errors:', num_taxon_level_errors)
-    print('num scientific name errors:', num_scientific_name_errors)
-#%% Command-line driver
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'taxonomy_csv_path',
-        help='path to taxonomy CSV file')
-    if len(sys.argv[1:]) == 0:
-        parser.print_help()
-        parser.exit()
-    args = parser.parse_args()
-    check_taxonomy_csv(args.taxonomy_csv_path)
-#%% Interactive driver
-if False:
-    #%%
-    import os
-    csv_path = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
-    check_taxonomy_csv(csv_path)

taxonomy_mapping/taxonomy_graph.py DELETED Viewed

@@ -1,346 +0,0 @@
-"""
-taxonomy_graph.py
-Methods for transforming taxonomy CSV into a graph structure backed by
-NetworkX.
-We treat each taxon in the taxonomy as a node in a graph, represented by the
-TaxonNode class. We use a NetworkX directed graph (nx.DiGraph) to keep track of
-the edges (parent-child relationships) between the nodes.
-In theory, the true biological taxonomy graph should be a tree, where every
-taxon node has exactly 1 parent. However, because we use both GBIF and INAT
-taxonomies, there are 2 situations where a taxon node ends up with two parents.
-Thus, the graph is actually a "directed acyclic graph" (DAG) instead of a tree.
-The two situations are explained in detail below. This module includes a
-function dag_to_tree() which converts a DAG to a tree by heuristically removing
-edges from the DAG so that each node only has 1 parent.
-CASE 1: INAT and GBIF have different granularity in their taxonomy levels
-======
-An example is shown below. In dag_to_tree(), the lower parent is kept, while
-the higher-up parent is discarded. In this example, the "sciurini -> sciurus"
-edge would be kept, while "sciuridae -> sciurus" would be removed.
-        "eastern gray squirrel" (inat)     "squirrel" (gbif)
-        ------------------------------     -----------------
-    family:                        sciuridae
-                                  /          \
-    subfamily:          sciurinae             |  # skips subfamily
-                                |             |
-    tribe:               sciurini             |  # skips tribe
-                                  \          /
-    genus:                          sciurus
-CASE 2: INAT and GBIF have different taxonomies
-======
-An example is shown below. In dag_to_tree(), the resolution to these
-discrepancies are hard-coded.
-    order:    cathartiformes (inat)     accipitriformes (gbif)
-                           \           /
-    family:                 cathartidae
-"""
-#%% Imports and constants
-# allow forward references in typing annotations
-from __future__ import annotations
-from typing import (ClassVar, Container, Dict, Iterable, List, Optional, Set,
-                    Tuple)
-import networkx as nx
-import pandas as pd
-default_source = 'inat'
-#%% Classes
-class TaxonNode:
-    """
-    A node in a taxonomy graph (DAG), associated with a set of dataset labels.
-    By default, we support multiple parents for each TaxonNode. See discussion
-    in module docstring above.
-    """
-    # class variables
-    single_parent_only: ClassVar[bool] = False
-    # instance variables
-    level: str
-    name: str
-    ids: Set[Tuple[str, int]]
-    graph: Optional[nx.DiGraph]
-    dataset_labels: Set[Tuple[str, str]]
-    def __init__(self, level: str, name: str,
-                 graph: Optional[nx.DiGraph] = None):
-        self.level = level
-        self.name = name
-        self.graph = graph
-        self.ids = set()
-        self.dataset_labels = set()
-    def __repr__(self):
-        id_str = ', '.join(f'{source}={id}' for source, id in self.ids)
-        return f'TaxonNode({id_str}, level={self.level}, name={self.name})'
-    @property  # read-only getter
-    def parents(self) -> List[TaxonNode]:
-        assert self.graph is not None
-        return list(self.graph.predecessors(self))
-    @parents.setter
-    def parents(self, parents: Iterable[TaxonNode]) -> None:
-        assert self.graph is not None
-        for p in self.parents:
-            self.graph.remove_edge(p, self)
-        for p in parents:
-            self.graph.add_edge(p, self)
-    @property  # read-only getter
-    def children(self) -> List[TaxonNode]:
-        assert self.graph is not None
-        return list(self.graph.successors(self))
-    @children.setter
-    def children(self, children: Iterable[TaxonNode]) -> None:
-        assert self.graph is not None
-        for c in self.children:
-            self.graph.remove_edge(self, c)
-        for c in children:
-            self.graph.add_edge(self, c)
-    def add_id(self, source: str, taxon_id: int) -> None:
-        # assert source in ['gbif', 'inat', 'manual']
-        self.ids.add((source, taxon_id))
-    def add_parent(self, parent: TaxonNode) -> None:
-        """
-        Adds a TaxonNode to the list of parents of the current TaxonNode.
-        Requires this TaxonNode to be associated with a Graph.
-        Args:
-            parent: TaxonNode, must be higher in the taxonomical hierarchy
-        """
-        assert self.graph is not None
-        parents = self.parents
-        if TaxonNode.single_parent_only and len(parents) > 0:
-            assert len(parents) == 1
-            assert parents[0] is parent, (
-                f'self.parents: {parents}, new parent: {parent}')
-            return
-        if parent not in parents:
-            self.graph.add_edge(parent, self)
-    def add_child(self, child: TaxonNode) -> None:
-        """
-        Adds a TaxonNode to the list of children of the current TaxonNode.
-        Requires this TaxonNode to be associated with a Graph.
-        Args:
-            child: TaxonNode, must be lower in the taxonomical hierarchy
-        """
-        assert self.graph is not None
-        self.graph.add_edge(self, child)
-    def add_dataset_label(self, ds: str, ds_label: str) -> None:
-        """
-        Args:
-            ds: str, name of dataset
-            ds_label: str, name of label used by that dataset
-        """
-        self.dataset_labels.add((ds, ds_label))
-    def get_dataset_labels(self,
-                           include_datasets: Optional[Container[str]] = None
-                           ) -> Set[Tuple[str, str]]:
-        """
-        Returns a set of all (ds, ds_label) tuples that belong to this taxon
-        node or its descendants.
-        Args:
-            include_datasets: list of str, names of datasets to include
-                if None, then all datasets are included
-        Returns: set of (ds, ds_label) tuples
-        """
-        result = self.dataset_labels
-        if include_datasets is not None:
-            result = set(tup for tup in result if tup[0] in include_datasets)
-        for child in self.children:
-            result |= child.get_dataset_labels(include_datasets)
-        return result
-    @classmethod
-    def lowest_common_ancestor(cls, nodes: Iterable[TaxonNode]
-                               ) -> Optional[TaxonNode]:
-        """
-        Returns the lowest common ancestor (LCA) of a list or set of nodes.
-        For each node in <nodes>, get the set of nodes on the path to the root.
-        The LCA of <nodes> is certainly in the intersection of these sets.
-        Iterate through the nodes in this set intersection, looking for a node
-        such that none of its children is in this intersection. Given n nodes
-        from a k-ary tree of height h, the algorithm runs in O((n + k)h).
-        Returns: TaxonNode, the LCA if it exists, or None if no LCA exists
-        """
-        paths = []
-        for node in nodes:
-            # get path to root
-            path = {node}
-            remaining = node.parents.copy()  # make a shallow copy
-            while len(remaining) > 0:
-                x = remaining.pop()
-                if x not in path:
-                    path.add(x)
-                    remaining += x.parents
-            paths.append(path)
-        intersect = set.intersection(*paths)
-        for node in intersect:
-            if intersect.isdisjoint(node.children):
-                return node
-        return None
-#%% Module functions
-def build_taxonomy_graph(taxonomy_df: pd.DataFrame
-                         ) -> Tuple[
-                             nx.DiGraph,
-                             Dict[Tuple[str, str], TaxonNode],
-                             Dict[Tuple[str, str], TaxonNode]
-                         ]:
-    """
-    Creates a mapping from (taxon_level, taxon_name) to TaxonNodes, used for
-    gathering all dataset labels associated with a given taxon.
-    Args:
-        taxonomy_df: pd.DataFrame, the taxonomy CSV
-    Returns:
-        graph: nx.DiGraph
-        taxon_to_node: dict, maps (taxon_level, taxon_name) to a TaxonNode,
-            keys are all lowercase
-        label_to_node: dict, maps (dataset_name, dataset_label) to the lowest
-            TaxonNode node in the tree that contains the label,
-            keys are all lowercase
-    """
-    graph = nx.DiGraph()
-    taxon_to_node = {}  # maps (taxon_level, taxon_name) to a TaxonNode
-    label_to_node = {}  # maps (dataset_name, dataset_label) to a TaxonNode
-    for _, row in taxonomy_df.iterrows():
-        ds = row['dataset_name'].lower()
-        ds_label = row['query'].lower()
-        if 'source' in row:
-            id_source = row['source']
-        else:
-            id_source = default_source
-        taxa_ancestry = row['taxonomy_string']
-        if pd.isna(taxa_ancestry):
-            # taxonomy CSV rows without 'taxonomy_string' entries are excluded
-            # from the taxonomy graph, but can be included in a classification
-            # label specification JSON via the 'dataset_labels' key
-            continue
-        else:
-            taxa_ancestry = eval(taxa_ancestry)  # pylint: disable=eval-used
-        taxon_child: Optional[TaxonNode] = None
-        for i, taxon in enumerate(taxa_ancestry):
-            taxon_id, taxon_level, taxon_name, _ = taxon
-            taxon_level = taxon_level.lower()
-            taxon_name = taxon_name.lower()
-            key = (taxon_level, taxon_name)
-            if key not in taxon_to_node:
-                taxon_to_node[key] = TaxonNode(level=taxon_level,
-                                               name=taxon_name, graph=graph)
-            node = taxon_to_node[key]
-            if taxon_child is not None:
-                node.add_child(taxon_child)
-            node.add_id(id_source, int(taxon_id))  # np.int64 -> int
-            if i == 0:
-                assert row['taxonomy_level'] == taxon_level, (
-                    f'taxonomy CSV level: {row["taxonomy_level"]}, '
-                    f'level from taxonomy_string: {taxon_level}')
-                assert row['scientific_name'] == taxon_name
-                node.add_dataset_label(ds, ds_label)
-                label_to_node[(ds, ds_label)] = node
-            taxon_child = node
-    assert nx.is_directed_acyclic_graph(graph)
-    return graph, taxon_to_node, label_to_node
-def dag_to_tree(graph: nx.DiGraph,
-                taxon_to_node: Dict[Tuple[str, str], TaxonNode]) -> nx.DiGraph:
-    """
-    Converts the taxonomy graph from a DAG to a tree. See module docstring
-    for more information.
-    NOTE: nx.is_tree() on the output of this function might fail because the
-    tree may have disconnected components. Instead, check nx.is_tree() on each
-    component separately.
-    Args:
-        graph: nx.DiGraph, DAG representation of taxonomy hieararchy
-        taxon_to_node: dict, maps (taxon_level, taxon_name) to a TaxonNode
-    Returns: nx.DiGraph, a tree-structured graph
-    """
-    tree = nx.DiGraph()
-    for node in graph.nodes:
-        tree.add_node(node)
-        if len(node.parents) == 1:
-            tree.add_edge(node.parents[0], node)
-        elif len(node.parents) == 2:
-            p0 = node.parents[0]
-            p1 = node.parents[1]
-            # use the lower parent
-            if p1 in nx.descendants(graph, p0):
-                tree.add_edge(p1, node)
-            elif p0 in nx.descendants(graph, p1):
-                tree.add_edge(p0, node)
-            else:
-                # special cases
-                if node.name == 'cathartidae':
-                    p = taxon_to_node[('order', 'accipitriformes')]
-                elif node.name == 'soricidae':
-                    p = taxon_to_node[('order', 'eulipotyphla')]
-                elif node.name == 'nyctanassa violacea':
-                    p = taxon_to_node[('genus', 'nyctanassa')]
-                elif node.name == 'trochilidae':  # this one is controversial
-                    p = taxon_to_node[('order', 'caprimulgiformes')]
-                else:
-                    assert False
-                assert (p is p0) or (p is p1)
-                tree.add_edge(p, node)
-    for node in tree.nodes:
-        node.graph = tree
-    return tree

taxonomy_mapping/validate_lila_category_mappings.py DELETED Viewed

@@ -1,83 +0,0 @@
-"""
-validate_lila_category_mappings.py
-Confirm that all category names on LILA have mappings in the taxonomy file.
-"""
-#%% Constants and imports
-import json
-import os
-from data_management.lila.lila_common import read_lila_taxonomy_mapping
-#%% Prevent execution during infrastructural imports
-if False:
-    #%% Constants
-    lila_local_base = os.path.expanduser('~/lila')
-    metadata_dir = os.path.join(lila_local_base,'metadata')
-    os.makedirs(metadata_dir,exist_ok=True)
-    # Created by get_lila_category_list.py... contains counts for each category
-    category_list_dir = os.path.join(lila_local_base,'lila_categories_list')
-    lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
-    assert os.path.isfile(lila_dataset_to_categories_file)
-    #%% Load category and taxonomy files
-    with open(lila_dataset_to_categories_file,'r') as f:
-        lila_dataset_to_categories = json.load(f)
-    taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
-    #%% Map dataset names and category names to scientific names
-    ds_query_to_scientific_name = {}
-    unmapped_queries = set()
-    # i_row = 1; row = taxonomy_df.iloc[i_row]; row
-    for i_row,row in taxonomy_df.iterrows():
-        ds_query = row['dataset_name'] + ':' + row['query']
-        ds_query = ds_query.lower()
-        if not isinstance(row['scientific_name'],str):
-            unmapped_queries.add(ds_query)
-            ds_query_to_scientific_name[ds_query] = 'unmapped'
-            continue
-        ds_query_to_scientific_name[ds_query] = row['scientific_name']
-    #%% For each dataset, make sure we can map every category to the taxonomy
-    # dataset_name = list(lila_dataset_to_categories.keys())[0]
-    for _dataset_name in lila_dataset_to_categories.keys():
-        if '_bbox' in _dataset_name:
-            dataset_name = _dataset_name.replace('_bbox','')
-        else:
-            dataset_name = _dataset_name
-        categories = lila_dataset_to_categories[dataset_name]
-        # c = categories[0]
-        for c in categories:
-            ds_query = dataset_name + ':' + c['name']
-            ds_query = ds_query.lower()
-            if ds_query not in ds_query_to_scientific_name:
-                print('Could not find mapping for {}'.format(ds_query))
-            else:
-                scientific_name = ds_query_to_scientific_name[ds_query]

{megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL RENAMED Viewed

File without changes

megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.9py3-none-any.whl → 5.0.11py3-none-any.whl