PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0

taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py CHANGED Viewed

@@ -1,11 +1,11 @@
-########
-#
-# map_lila_taxonomy_to_wi_taxonomy.py
-#
-# Loads the LILA category mapping (in which taxonomy information comes from an
-# iNat taxonomy snapshot) and tries to map each class to the Wildlife Insights taxonomy.
-#
-########
+"""
+map_lila_taxonomy_to_wi_taxonomy.py
+Loads the LILA category mapping (in which taxonomy information comes from an
+iNat taxonomy snapshot) and tries to map each class to the Wildlife Insights taxonomy.
+"""
 #%% Constants and imports
@@ -18,467 +18,474 @@ from tqdm import tqdm
 from data_management.lila.lila_common import read_lila_taxonomy_mapping, \
     read_wildlife_insights_taxonomy_mapping
-lila_local_base = os.path.expanduser('~/lila')
-metadata_dir = os.path.join(lila_local_base, 'metadata')
-os.makedirs(metadata_dir, exist_ok=True)
+#%% Prevent execution during infrastructural imports
-# Created by get_lila_category_list.py... contains counts for each category
-category_list_dir = os.path.join(lila_local_base, 'lila_categories_list')
-lila_dataset_to_categories_file = os.path.join(
-    category_list_dir, 'lila_dataset_to_categories.json')
+if False:
+    #%%
+    lila_local_base = os.path.expanduser('~/lila')
-# This is a manually-curated file used to store mappings that had to be made manually
-lila_to_wi_supplementary_mapping_file = os.path.expanduser(
-    '~/git/MegaDetector/taxonomy_mapping/lila_to_wi_supplementary_mapping_file.csv')
+    metadata_dir = os.path.join(lila_local_base, 'metadata')
+    os.makedirs(metadata_dir, exist_ok=True)
-assert os.path.isfile(lila_dataset_to_categories_file)
+    # Created by get_lila_category_list.py... contains counts for each category
+    category_list_dir = os.path.join(lila_local_base, 'lila_categories_list')
+    lila_dataset_to_categories_file = os.path.join(
+        category_list_dir, 'lila_dataset_to_categories.json')
-# This is the main output file from this whole process
-wi_mapping_table_file = os.path.join(lila_local_base,'lila_wi_mapping_table.csv')
+    # This is a manually-curated file used to store mappings that had to be made manually
+    lila_to_wi_supplementary_mapping_file = os.path.expanduser(
+        '~/git/MegaDetector/taxonomy_mapping/lila_to_wi_supplementary_mapping_file.csv')
-id_column = 'uniqueIdentifier' # 'id'
+    assert os.path.isfile(lila_dataset_to_categories_file)
+    # This is the main output file from this whole process
+    wi_mapping_table_file = os.path.join(lila_local_base,'lila_wi_mapping_table.csv')
-#%% Load category and taxonomy files
+    id_column = 'uniqueIdentifier' # 'id'
-with open(lila_dataset_to_categories_file, 'r') as f:
-    lila_dataset_to_categories = json.load(f)
-lila_taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
+    #%% Load category and taxonomy files
-wi_taxonomy_df = read_wildlife_insights_taxonomy_mapping(metadata_dir)
+    with open(lila_dataset_to_categories_file, 'r') as f:
+        lila_dataset_to_categories = json.load(f)
+    lila_taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
-#%% Pull everything out of pandas
+    wi_taxonomy_df = read_wildlife_insights_taxonomy_mapping(metadata_dir)
-lila_taxonomy = lila_taxonomy_df.to_dict('records')
-wi_taxonomy = wi_taxonomy_df.to_dict('records')
+    #%% Pull everything out of pandas
-#%% Cache WI taxonomy lookups
+    lila_taxonomy = lila_taxonomy_df.to_dict('records')
+    wi_taxonomy = wi_taxonomy_df.to_dict('records')
-def is_empty_wi_item(v):
-    if isinstance(v, str):
-        return len(v) == 0
-    elif v is None:
-        return True
-    else:
-        assert isinstance(v, float) and np.isnan(v), 'Invalid item: {}'.format(str(v))
-        return True
+    #%% Cache WI taxonomy lookups
-def taxonomy_items_equal(a, b):
-    if isinstance(a, str) and (not isinstance(b, str)):
-        return False
-    if isinstance(b, str) and (not isinstance(a, str)):
-        return False
-    if (not isinstance(a, str)) or (not isinstance(b, str)):
-        assert (a is None and b is None) or (isinstance(a, float) and isinstance(b, float))
-        return True
-    return a == b
+    def is_empty_wi_item(v):
+        if isinstance(v, str):
+            return len(v) == 0
+        elif v is None:
+            return True
+        else:
+            assert isinstance(v, float) and np.isnan(v), 'Invalid item: {}'.format(str(v))
+            return True
-for taxon in wi_taxonomy:
-    taxon['taxon_name'] = None
+    def taxonomy_items_equal(a, b):
+        if isinstance(a, str) and (not isinstance(b, str)):
+            return False
+        if isinstance(b, str) and (not isinstance(a, str)):
+            return False
+        if (not isinstance(a, str)) or (not isinstance(b, str)):
+            assert (a is None and b is None) or (isinstance(a, float) and isinstance(b, float))
+            return True
+        return a == b
-from collections import defaultdict
-wi_taxon_name_to_taxa = defaultdict(list)
-# This is just a handy lookup table that we'll use to debug mismatches
-wi_common_name_to_taxon = {}
+    for taxon in wi_taxonomy:
+        taxon['taxon_name'] = None
-blank_taxon_name = 'blank'
-blank_taxon = None
+    from collections import defaultdict
+    wi_taxon_name_to_taxa = defaultdict(list)
-animal_taxon_name = 'animal'
-animal_taxon = None
+    # This is just a handy lookup table that we'll use to debug mismatches
+    wi_common_name_to_taxon = {}
-unknown_taxon_name = 'unknown'
-unknown_taxon = None
+    blank_taxon_name = 'blank'
+    blank_taxon = None
-ignore_taxa = set(['No CV Result', 'CV Needed', 'CV Failed'])
+    animal_taxon_name = 'animal'
+    animal_taxon = None
-known_problematic_taxon_ids = ['f94e6d97-59cf-4d38-a05a-a75efdd2863b']
+    unknown_taxon_name = 'unknown'
+    unknown_taxon = None
-human_taxa = []
+    ignore_taxa = set(['No CV Result', 'CV Needed', 'CV Failed'])
-# taxon = wi_taxonomy[21653]; print(taxon)
-for taxon in tqdm(wi_taxonomy):
+    known_problematic_taxon_ids = ['f94e6d97-59cf-4d38-a05a-a75efdd2863b']
-    taxon_name = None
+    human_taxa = []
-    assert taxon['taxonomyType'] == 'object' or taxon['taxonomyType'] == 'biological'
+    # taxon = wi_taxonomy[21653]; print(taxon)
+    for taxon in tqdm(wi_taxonomy):
-    for k in taxon.keys():
-        v = taxon[k]
-        if isinstance(v,str):
-            taxon[k] = v.strip()
-    if taxon['commonNameEnglish'] in ignore_taxa:
-        continue
+        taxon_name = None
-    if isinstance(taxon['commonNameEnglish'], str):
+        assert taxon['taxonomyType'] == 'object' or taxon['taxonomyType'] == 'biological'
-        wi_common_name_to_taxon[taxon['commonNameEnglish'].strip(
-        ).lower()] = taxon
+        for k in taxon.keys():
+            v = taxon[k]
+            if isinstance(v,str):
+                taxon[k] = v.strip()
+        if taxon['commonNameEnglish'] in ignore_taxa:
+            continue
-        special_taxon = False
+        if isinstance(taxon['commonNameEnglish'], str):
-        # Look for keywords that don't refer to specific taxa: blank/animal/unknown
-        if taxon['commonNameEnglish'].strip().lower() == blank_taxon_name:
-            blank_taxon = taxon
-            special_taxon = True
+            wi_common_name_to_taxon[taxon['commonNameEnglish'].strip(
+            ).lower()] = taxon
-        elif taxon['commonNameEnglish'].strip().lower() == animal_taxon_name:
-            animal_taxon = taxon
-            special_taxon = True
+            special_taxon = False
-        elif taxon['commonNameEnglish'].strip().lower() == unknown_taxon_name:
-            unknown_taxon = taxon
-            special_taxon = True
+            # Look for keywords that don't refer to specific taxa: blank/animal/unknown
+            if taxon['commonNameEnglish'].strip().lower() == blank_taxon_name:
+                blank_taxon = taxon
+                special_taxon = True
-        if special_taxon:
-            taxon_name = taxon['commonNameEnglish'].strip().lower()
-            taxon['taxon_name'] = taxon_name
-            wi_taxon_name_to_taxa[taxon_name].append(taxon)
-            continue
+            elif taxon['commonNameEnglish'].strip().lower() == animal_taxon_name:
+                animal_taxon = taxon
+                special_taxon = True
-    # Do we have a species name?
-    if not is_empty_wi_item(taxon['species']):
+            elif taxon['commonNameEnglish'].strip().lower() == unknown_taxon_name:
+                unknown_taxon = taxon
+                special_taxon = True
-        # If 'species' is populated, 'genus' should always be populated; one item currently breaks
-        # this rule.
-        assert not is_empty_wi_item(taxon['genus'])
-        taxon_name = (taxon['genus'].strip() + ' ' +
-                      taxon['species'].strip()).strip().lower()
-        assert not is_empty_wi_item(taxon['class']) and \
-            not is_empty_wi_item(taxon['order']) and \
-            not is_empty_wi_item(taxon['family'])
+            if special_taxon:
+                taxon_name = taxon['commonNameEnglish'].strip().lower()
+                taxon['taxon_name'] = taxon_name
+                wi_taxon_name_to_taxa[taxon_name].append(taxon)
+                continue
+        # Do we have a species name?
+        if not is_empty_wi_item(taxon['species']):
-    elif not is_empty_wi_item(taxon['genus']):
+            # If 'species' is populated, 'genus' should always be populated; one item currently breaks
+            # this rule.
+            assert not is_empty_wi_item(taxon['genus'])
+            taxon_name = (taxon['genus'].strip() + ' ' +
+                        taxon['species'].strip()).strip().lower()
+            assert not is_empty_wi_item(taxon['class']) and \
+                not is_empty_wi_item(taxon['order']) and \
+                not is_empty_wi_item(taxon['family'])
-        assert not is_empty_wi_item(taxon['class']) and \
-            not is_empty_wi_item(taxon['order']) and \
-            not is_empty_wi_item(taxon['family'])
-        taxon_name = taxon['genus'].strip().lower()
+        elif not is_empty_wi_item(taxon['genus']):
-    elif not is_empty_wi_item(taxon['family']):
+            assert not is_empty_wi_item(taxon['class']) and \
+                not is_empty_wi_item(taxon['order']) and \
+                not is_empty_wi_item(taxon['family'])
+            taxon_name = taxon['genus'].strip().lower()
-        assert not is_empty_wi_item(taxon['class']) and \
-            not is_empty_wi_item(taxon['order'])
-        taxon_name = taxon['family'].strip().lower()
+        elif not is_empty_wi_item(taxon['family']):
-    elif not is_empty_wi_item(taxon['order']):
+            assert not is_empty_wi_item(taxon['class']) and \
+                not is_empty_wi_item(taxon['order'])
+            taxon_name = taxon['family'].strip().lower()
-        assert not is_empty_wi_item(taxon['class'])
-        taxon_name = taxon['order'].strip().lower()
+        elif not is_empty_wi_item(taxon['order']):
-    elif not is_empty_wi_item(taxon['class']):
+            assert not is_empty_wi_item(taxon['class'])
+            taxon_name = taxon['order'].strip().lower()
-        taxon_name = taxon['class'].strip().lower()
+        elif not is_empty_wi_item(taxon['class']):
-    if taxon_name is not None:
-        assert taxon['taxonomyType'] == 'biological'
-    else:
-        assert taxon['taxonomyType'] == 'object'
-        taxon_name = taxon['commonNameEnglish'].strip().lower()
+            taxon_name = taxon['class'].strip().lower()
-    if taxon_name in wi_taxon_name_to_taxa:
-        if taxon[id_column] in known_problematic_taxon_ids:
-            print('Skipping problematic taxon ID {}'.format(taxon[id_column]))
+        if taxon_name is not None:
+            assert taxon['taxonomyType'] == 'biological'
         else:
-            previous_taxa = wi_taxon_name_to_taxa[taxon_name]
-            for previous_taxon in previous_taxa:
-                for level in ['class', 'order', 'family', 'genus', 'species']:
-                    error_string = 'Error: taxon {} appeared previously in {} {} (as {}), now in {} {}'.format(
-                        taxon_name,
-                        level,previous_taxon[level],
-                        previous_taxon['taxon_name'],
-                        level,taxon[level])
-                    assert taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
-    taxon['taxon_name'] = taxon_name
-    if taxon_name == 'homo sapiens':
-        human_taxa.append(taxon)
-    wi_taxon_name_to_taxa[taxon_name].append(taxon)
+            assert taxon['taxonomyType'] == 'object'
+            taxon_name = taxon['commonNameEnglish'].strip().lower()
-# ...for each taxon
+        if taxon_name in wi_taxon_name_to_taxa:
+            if taxon[id_column] in known_problematic_taxon_ids:
+                print('Skipping problematic taxon ID {}'.format(taxon[id_column]))
+            else:
+                previous_taxa = wi_taxon_name_to_taxa[taxon_name]
+                for previous_taxon in previous_taxa:
+                    for level in ['class', 'order', 'family', 'genus', 'species']:
+                        error_string = 'Error: taxon {} appeared previously in {} {} (as {}), now in {} {}'.format(
+                            taxon_name,
+                            level,previous_taxon[level],
+                            previous_taxon['taxon_name'],
+                            level,taxon[level])
+                        assert taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
+        taxon['taxon_name'] = taxon_name
+        if taxon_name == 'homo sapiens':
+            human_taxa.append(taxon)
+        wi_taxon_name_to_taxa[taxon_name].append(taxon)
-assert unknown_taxon is not None
-assert animal_taxon is not None
-assert blank_taxon is not None
+    # ...for each taxon
+    assert unknown_taxon is not None
+    assert animal_taxon is not None
+    assert blank_taxon is not None
-#%% Find redundant taxa
-taxon_names_with_multiple_entries = []
-for wi_taxon_name in wi_taxon_name_to_taxa:
-    if len(wi_taxon_name_to_taxa[wi_taxon_name]) > 1:
-        taxon_names_with_multiple_entries.append(wi_taxon_name)
+    #%% Find redundant taxa
-print('{} names have multiple entries\n:'.format(len(taxon_names_with_multiple_entries)))
+    taxon_names_with_multiple_entries = []
+    for wi_taxon_name in wi_taxon_name_to_taxa:
+        if len(wi_taxon_name_to_taxa[wi_taxon_name]) > 1:
+            taxon_names_with_multiple_entries.append(wi_taxon_name)
-for s in taxon_names_with_multiple_entries:
-    print(s)
+    print('{} names have multiple entries\n:'.format(len(taxon_names_with_multiple_entries)))
-if False:
-    pass
+    for s in taxon_names_with_multiple_entries:
+        print(s)
-    #%% Manual review of redundant taxa
-    s = taxon_names_with_multiple_entries[15]
-    taxa = wi_taxon_name_to_taxa[s]
-    for t in taxa:
-        for k in t.keys():
-            print('{}: {}'.format(k,t[k]))
-        print()
-        # print(t,end='\n\n')
+    if False:
+        pass
+        #%% Manual review of redundant taxa
+        s = taxon_names_with_multiple_entries[15]
+        taxa = wi_taxon_name_to_taxa[s]
+        for t in taxa:
+            for k in t.keys():
+                print('{}: {}'.format(k,t[k]))
+            print()
+            # print(t,end='\n\n')
-#%% Clean up redundant taxa
-taxon_name_to_preferred_taxon_id = {}
+    #%% Clean up redundant taxa
-# "helmeted guineafowl" vs "domestic guineafowl"
-taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
+    taxon_name_to_preferred_taxon_id = {}
-# "domestic turkey" vs. "wild turkey"
-taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
+    # "helmeted guineafowl" vs "domestic guineafowl"
+    taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
-# multiple sensible human entries
-taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
+    # "domestic turkey" vs. "wild turkey"
+    taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
-# "domestic dog" and "dog-on-leash"
-taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
+    # multiple sensible human entries
+    taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
-# "small mammal" vs. "mammal"
-taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
+    # "domestic dog" and "dog-on-leash"
+    taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
-# "Hispaniolan Mango" vs. NaN
-taxon_name_to_preferred_taxon_id['anthracothorax dominicus'] = 'f94e6d97-59cf-4d38-a05a-a75efdd2863b'
+    # "small mammal" vs. "mammal"
+    taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
-# "millipedes" vs. "Millipede"
-taxon_name_to_preferred_taxon_id['diplopoda'] =  '065884eb-4e64-4233-84dc-de25bd06ffd2' # 2021760
+    # "Hispaniolan Mango" vs. NaN
+    taxon_name_to_preferred_taxon_id['anthracothorax dominicus'] = 'f94e6d97-59cf-4d38-a05a-a75efdd2863b'
-# Different suborders: Squamata vs. Lacertilia
-taxon_name_to_preferred_taxon_id['squamata'] = '710c4066-bd5d-4313-bcf4-0217c4c84da7' # 2021703
+    # "millipedes" vs. "Millipede"
+    taxon_name_to_preferred_taxon_id['diplopoda'] =  '065884eb-4e64-4233-84dc-de25bd06ffd2' # 2021760
-# Redundancy (both "beautiful firetail")
-taxon_name_to_preferred_taxon_id['stagonopleura bella'] = '7fec8e7e-fd3b-4d7f-99fd-3ade6f3bbaa5' # 2021939
+    # Different suborders: Squamata vs. Lacertilia
+    taxon_name_to_preferred_taxon_id['squamata'] = '710c4066-bd5d-4313-bcf4-0217c4c84da7' # 2021703
-# "yellow wagtail" vs. "yellow crowned-wagtail"
-taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
+    # Redundancy (both "beautiful firetail")
+    taxon_name_to_preferred_taxon_id['stagonopleura bella'] = '7fec8e7e-fd3b-4d7f-99fd-3ade6f3bbaa5' # 2021939
-# "dremomys species" vs. "dremomys genus"
-taxon_name_to_preferred_taxon_id['dremomys'] = '1507d153-af11-46f1-bfb8-77918d035ab3' # 2019370
+    # "yellow wagtail" vs. "yellow crowned-wagtail"
+    taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
-# "elk" vs. "domestic elk"
-taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
+    # "dremomys species" vs. "dremomys genus"
+    taxon_name_to_preferred_taxon_id['dremomys'] = '1507d153-af11-46f1-bfb8-77918d035ab3' # 2019370
-# "American bison" vs. "domestic bison"
-taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
+    # "elk" vs. "domestic elk"
+    taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
-# "woodrat or rat or mouse species" vs. "mouse species"
-taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
+    # "American bison" vs. "domestic bison"
+    taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
-# both "southern sand frog"
-taxon_name_to_preferred_taxon_id['tomopterna adiastola'] = 'a5dc63cb-41be-4090-84a7-b944b16dcee4' # 2021834
+    # "woodrat or rat or mouse species" vs. "mouse species"
+    taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
-# sericornis species vs. scrubwren species
-taxon_name_to_preferred_taxon_id['sericornis'] = 'ad82c0ac-df48-4028-bf71-d2b2f4bc4129' # 2021776
+    # both "southern sand frog"
+    taxon_name_to_preferred_taxon_id['tomopterna adiastola'] = 'a5dc63cb-41be-4090-84a7-b944b16dcee4' # 2021834
-# taxon_name = list(taxon_name_to_preferred_taxon_id.keys())[0]
-for taxon_name in taxon_name_to_preferred_taxon_id.keys():
-    candidate_taxa = wi_taxon_name_to_taxa[taxon_name]
-    # If we've gotten this far, we should be choosing from multiple taxa.
-    #
-    # This will become untrue if any of these are resolved later, at which point we should
-    # remove them from taxon_name_to_preferred_id
-    assert len(candidate_taxa) > 1, 'Only one taxon available for {}'.format(taxon_name)
-    # Choose the preferred taxa
-    selected_taxa = [t for t in candidate_taxa if t[id_column] == \
-                     taxon_name_to_preferred_taxon_id[taxon_name]]
-    assert len(selected_taxa) == 1
-    wi_taxon_name_to_taxa[taxon_name] = selected_taxa
+    # sericornis species vs. scrubwren species
+    taxon_name_to_preferred_taxon_id['sericornis'] = 'ad82c0ac-df48-4028-bf71-d2b2f4bc4129' # 2021776
+    # taxon_name = list(taxon_name_to_preferred_taxon_id.keys())[0]
+    for taxon_name in taxon_name_to_preferred_taxon_id.keys():
+        candidate_taxa = wi_taxon_name_to_taxa[taxon_name]
+        # If we've gotten this far, we should be choosing from multiple taxa.
+        #
+        # This will become untrue if any of these are resolved later, at which point we should
+        # remove them from taxon_name_to_preferred_id
+        assert len(candidate_taxa) > 1, 'Only one taxon available for {}'.format(taxon_name)
+        # Choose the preferred taxa
+        selected_taxa = [t for t in candidate_taxa if t[id_column] == \
+                        taxon_name_to_preferred_taxon_id[taxon_name]]
+        assert len(selected_taxa) == 1
+        wi_taxon_name_to_taxa[taxon_name] = selected_taxa
-wi_taxon_name_to_taxon = {}
+    wi_taxon_name_to_taxon = {}
-for taxon_name in wi_taxon_name_to_taxa.keys():
-    taxa = wi_taxon_name_to_taxa[taxon_name]
-    assert len(taxa) == 1
-    wi_taxon_name_to_taxon[taxon_name] = taxa[0]
+    for taxon_name in wi_taxon_name_to_taxa.keys():
+        taxa = wi_taxon_name_to_taxa[taxon_name]
+        assert len(taxa) == 1
+        wi_taxon_name_to_taxon[taxon_name] = taxa[0]
-#%% Read supplementary mappings
+    #%% Read supplementary mappings
-with open(lila_to_wi_supplementary_mapping_file, 'r') as f:
-    lines = f.readlines()
+    with open(lila_to_wi_supplementary_mapping_file, 'r') as f:
+        lines = f.readlines()
-supplementary_lila_query_to_wi_query = {}
+    supplementary_lila_query_to_wi_query = {}
-for line in lines:
-    # Each line is [lila query],[WI taxon name],[notes]
-    tokens = line.strip().split(',')
-    assert len(tokens) == 3
-    lila_query = tokens[0].strip().lower()
-    wi_taxon_name = tokens[1].strip().lower()
-    assert wi_taxon_name in wi_taxon_name_to_taxa
-    supplementary_lila_query_to_wi_query[lila_query] = wi_taxon_name
+    for line in lines:
+        # Each line is [lila query],[WI taxon name],[notes]
+        tokens = line.strip().split(',')
+        assert len(tokens) == 3
+        lila_query = tokens[0].strip().lower()
+        wi_taxon_name = tokens[1].strip().lower()
+        assert wi_taxon_name in wi_taxon_name_to_taxa
+        supplementary_lila_query_to_wi_query[lila_query] = wi_taxon_name
-#%% Map LILA categories to WI categories
+    #%% Map LILA categories to WI categories
-mismatches = set()
-mismatches_with_common_mappings = set()
-supplementary_mappings = set()
+    mismatches = set()
+    mismatches_with_common_mappings = set()
+    supplementary_mappings = set()
-all_searches = set()
+    all_searches = set()
-# Must be ordered from kingdom --> species
-lila_taxonomy_levels = ['kingdom', 'phylum', 'subphylum', 'superclass', 'class', 'subclass',
-                        'infraclass', 'superorder', 'order', 'suborder', 'infraorder',
-                        'superfamily', 'family', 'subfamily', 'tribe', 'genus', 'species']
+    # Must be ordered from kingdom --> species
+    lila_taxonomy_levels = ['kingdom', 'phylum', 'subphylum', 'superclass', 'class', 'subclass',
+                            'infraclass', 'superorder', 'order', 'suborder', 'infraorder',
+                            'superfamily', 'family', 'subfamily', 'tribe', 'genus', 'species']
-unknown_queries = set(
-    ['unidentifiable', 'other', 'unidentified', 'unknown', 'unclassifiable'])
-blank_queries = set(['empty'])
-animal_queries = set(['animalia'])
+    unknown_queries = set(
+        ['unidentifiable', 'other', 'unidentified', 'unknown', 'unclassifiable'])
+    blank_queries = set(['empty'])
+    animal_queries = set(['animalia'])
-lila_dataset_category_to_wi_taxon = {}
+    lila_dataset_category_to_wi_taxon = {}
-# i_taxon = 0; taxon = lila_taxonomy[i_taxon]; print(taxon)
-for i_taxon, lila_taxon in enumerate(lila_taxonomy):
+    # i_taxon = 0; taxon = lila_taxonomy[i_taxon]; print(taxon)
+    for i_taxon, lila_taxon in enumerate(lila_taxonomy):
-    query = None
+        query = None
-    lila_dataset_category = lila_taxon['dataset_name'] + ':' + lila_taxon['query']
-    # Go from kingdom --> species, choosing the lowest-level description as the query
-    for level in lila_taxonomy_levels:
-        if isinstance(lila_taxon[level], str):
-            query = lila_taxon[level]
-            all_searches.add(query)
+        lila_dataset_category = lila_taxon['dataset_name'] + ':' + lila_taxon['query']
+        # Go from kingdom --> species, choosing the lowest-level description as the query
+        for level in lila_taxonomy_levels:
+            if isinstance(lila_taxon[level], str):
+                query = lila_taxon[level]
+                all_searches.add(query)
-    if query is None:
-        # E.g., 'car'
-        query = lila_taxon['query']
+        if query is None:
+            # E.g., 'car'
+            query = lila_taxon['query']
-    wi_taxon = None
+        wi_taxon = None
-    if query in unknown_queries:
+        if query in unknown_queries:
-        wi_taxon = unknown_taxon
+            wi_taxon = unknown_taxon
-    elif query in blank_queries:
+        elif query in blank_queries:
-        wi_taxon = blank_taxon
+            wi_taxon = blank_taxon
-    elif query in animal_queries:
+        elif query in animal_queries:
-        wi_taxon = animal_taxon
+            wi_taxon = animal_taxon
-    elif query in wi_taxon_name_to_taxon:
+        elif query in wi_taxon_name_to_taxon:
-        wi_taxon = wi_taxon_name_to_taxon[query]
+            wi_taxon = wi_taxon_name_to_taxon[query]
-    elif query in supplementary_lila_query_to_wi_query:
+        elif query in supplementary_lila_query_to_wi_query:
-        wi_taxon = wi_taxon_name_to_taxon[supplementary_lila_query_to_wi_query[query]]
-        supplementary_mappings.add(query)
-        # print('Made a supplementary mapping from {} to {}'.format(query,wi_taxon['taxon_name']))
+            wi_taxon = wi_taxon_name_to_taxon[supplementary_lila_query_to_wi_query[query]]
+            supplementary_mappings.add(query)
+            # print('Made a supplementary mapping from {} to {}'.format(query,wi_taxon['taxon_name']))
-    else:
+        else:
-        # print('No match for {}'.format(query))
-        lila_common_name = lila_taxon['common_name']
+            # print('No match for {}'.format(query))
+            lila_common_name = lila_taxon['common_name']
-        if lila_common_name in wi_common_name_to_taxon:
-            wi_taxon = wi_common_name_to_taxon[lila_common_name]
-            wi_common_name = wi_taxon['commonNameEnglish']
-            wi_taxon_name = wi_taxon['taxon_name']
-            if False:
-                print('LILA common name {} maps to WI taxon {} ({})'.format(lila_common_name,
-                                                                            wi_taxon_name,
-                                                                            wi_common_name))
-            mismatches_with_common_mappings.add(query)
+            if lila_common_name in wi_common_name_to_taxon:
+                wi_taxon = wi_common_name_to_taxon[lila_common_name]
+                wi_common_name = wi_taxon['commonNameEnglish']
+                wi_taxon_name = wi_taxon['taxon_name']
+                if False:
+                    print('LILA common name {} maps to WI taxon {} ({})'.format(lila_common_name,
+                                                                                wi_taxon_name,
+                                                                                wi_common_name))
+                mismatches_with_common_mappings.add(query)
-        else:
+            else:
-            mismatches.add(query)
+                mismatches.add(query)
-    lila_dataset_category_to_wi_taxon[lila_dataset_category] = wi_taxon
+        lila_dataset_category_to_wi_taxon[lila_dataset_category] = wi_taxon
-# ...for each LILA taxon
+    # ...for each LILA taxon
-print('Of {} entities, there are {} mismatches ({} mapped by common name) ({} mapped by supplementary mapping file)'.format(
-    len(all_searches), len(mismatches), len(mismatches_with_common_mappings), len(supplementary_mappings)))
+    print('Of {} entities, there are {} mismatches ({} mapped by common name) ({} mapped by supplementary mapping file)'.format(
+        len(all_searches), len(mismatches), len(mismatches_with_common_mappings), len(supplementary_mappings)))
-assert len(mismatches) == 0
+    assert len(mismatches) == 0
-#%% Manual mapping
+    #%% Manual mapping
-if not os.path.isfile(lila_to_wi_supplementary_mapping_file):
-    print('Creating mapping file {}'.format(
-        lila_to_wi_supplementary_mapping_file))
-    with open(lila_to_wi_supplementary_mapping_file, 'w') as f:
-        for query in mismatches:
-            f.write(query + ',' + '\n')
-else:
-    print('{} exists, not re-writing'.format(lila_to_wi_supplementary_mapping_file))
+    if not os.path.isfile(lila_to_wi_supplementary_mapping_file):
+        print('Creating mapping file {}'.format(
+            lila_to_wi_supplementary_mapping_file))
+        with open(lila_to_wi_supplementary_mapping_file, 'w') as f:
+            for query in mismatches:
+                f.write(query + ',' + '\n')
+    else:
+        print('{} exists, not re-writing'.format(lila_to_wi_supplementary_mapping_file))
-#%% Build a dictionary from LILA dataset names and categories to LILA taxa
+    #%% Build a dictionary from LILA dataset names and categories to LILA taxa
-lila_dataset_category_to_lila_taxon = {}
+    lila_dataset_category_to_lila_taxon = {}
-# i_d = 0; d = lila_taxonomy[i_d]
-for i_d,d in enumerate(lila_taxonomy):
-    lila_dataset_category = d['dataset_name'] + ':' + d['query']
-    assert lila_dataset_category not in lila_dataset_category_to_lila_taxon
-    lila_dataset_category_to_lila_taxon[lila_dataset_category] = d
+    # i_d = 0; d = lila_taxonomy[i_d]
+    for i_d,d in enumerate(lila_taxonomy):
+        lila_dataset_category = d['dataset_name'] + ':' + d['query']
+        assert lila_dataset_category not in lila_dataset_category_to_lila_taxon
+        lila_dataset_category_to_lila_taxon[lila_dataset_category] = d
-#%% Map LILA datasets to WI taxa, and count the number of each taxon available in each dataset
+    #%% Map LILA datasets to WI taxa, and count the number of each taxon available in each dataset
-with open(wi_mapping_table_file,'w') as f:
-    f.write('lila_dataset_name,lila_category_name,wi_guid,wi_taxon_name,wi_common,count\n')
-    # dataset_name = list(lila_dataset_to_categories.keys())[0]
-    for dataset_name in lila_dataset_to_categories.keys():
+    with open(wi_mapping_table_file,'w') as f:
-        if '_bbox' in dataset_name:
-            continue
-        dataset_categories = lila_dataset_to_categories[dataset_name]
+        f.write('lila_dataset_name,lila_category_name,wi_guid,wi_taxon_name,wi_common,count\n')
-        # dataset_category = dataset_categories[0]
-        for category in dataset_categories:
+        # dataset_name = list(lila_dataset_to_categories.keys())[0]
+        for dataset_name in lila_dataset_to_categories.keys():
-            lila_dataset_category = dataset_name + ':' + category['name'].strip().lower()
-            if '#' in lila_dataset_category:
+            if '_bbox' in dataset_name:
                 continue
-            assert lila_dataset_category in lila_dataset_category_to_lila_taxon
-            assert lila_dataset_category in lila_dataset_category_to_wi_taxon
-            assert 'count' in category
-            wi_taxon = lila_dataset_category_to_wi_taxon[lila_dataset_category]
-            # Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
-            # and count
-            s = f"{dataset_name},{category['name']},{wi_taxon['uniqueIdentifier']},"+\
-                f"{wi_taxon['taxon_name']},{wi_taxon['commonNameEnglish']},{category['count']}\n"
-            f.write(s)
+            dataset_categories = lila_dataset_to_categories[dataset_name]
-        # ...for each category in this dataset
-    # ...for each dataset
+            # dataset_category = dataset_categories[0]
+            for category in dataset_categories:
+                lila_dataset_category = dataset_name + ':' + category['name'].strip().lower()
+                if '#' in lila_dataset_category:
+                    continue
+                assert lila_dataset_category in lila_dataset_category_to_lila_taxon
+                assert lila_dataset_category in lila_dataset_category_to_wi_taxon
+                assert 'count' in category
+                wi_taxon = lila_dataset_category_to_wi_taxon[lila_dataset_category]
+                # Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
+                # and count
+                s = f"{dataset_name},{category['name']},{wi_taxon['uniqueIdentifier']},"+\
+                    f"{wi_taxon['taxon_name']},{wi_taxon['commonNameEnglish']},{category['count']}\n"
+                f.write(s)
+            # ...for each category in this dataset
+        # ...for each dataset
-# ...with open()
+    # ...with open()

megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl