PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
megadetector-5.0.10.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0

taxonomy_mapping/map_new_lila_datasets.py CHANGED Viewed

@@ -1,154 +1,154 @@
-########
-#
-# map_new_lila_datasets.py
-#
-# Given a subset of LILA datasets, find all the categories, and start the taxonomy
-# mapping process.
-#
-########
-#%% Constants and imports
-import os
-import json
-# Created by get_lila_category_list.py
-input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-output_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
-datasets_to_map = [
-    'Trail Camera Images of New Zealand Animals'
-    ]
-#%% Initialize taxonomic lookup
-from taxonomy_mapping.species_lookup import (
-    initialize_taxonomy_lookup,
-    get_preferred_taxonomic_match)
-# from taxonomy_mapping.species_lookup import (
-#    get_taxonomic_info, print_taxonomy_matche)
-initialize_taxonomy_lookup(force_init=False)
-#%% Read the list of datasets
-with open(input_lila_category_list_file,'r') as f:
-    input_lila_categories = json.load(f)
-lila_datasets = set()
-for dataset_name in input_lila_categories.keys():
-    # The script that generates this dictionary creates a separate entry for bounding box
-    # metadata files, but those don't represent new dataset names
-    lila_datasets.add(dataset_name.replace('_bbox',''))
-for s in datasets_to_map:
-    assert s in lila_datasets
-#%% Find all categories
-category_mappings = []
-# dataset_name = datasets_to_map[0]
-for dataset_name in datasets_to_map:
-    ds_categories = input_lila_categories[dataset_name]
-    for category in ds_categories:
-        category_name = category['name']
-        assert ':' not in category_name
-        mapping_name = dataset_name + ':' + category_name
-        category_mappings.append(mapping_name)
-print('Need to create {} mappings'.format(len(category_mappings)))
-#%% Match every query against our taxonomies
-output_rows = []
-taxonomy_preference = 'inat'
-allow_non_preferred_matches = True
-# mapping_string = category_mappings[1]; print(mapping_string)
-for mapping_string in category_mappings:
-    tokens = mapping_string.split(':')
-    assert len(tokens) == 2
-    dataset_name = tokens[0]
-    query = tokens[1]
-    taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
-    if (taxonomic_match.source == taxonomy_preference) or allow_non_preferred_matches:
-        output_row = {
-            'dataset_name': dataset_name,
-            'query': query,
-            'source': taxonomic_match.source,
-            'taxonomy_level': taxonomic_match.taxonomic_level,
-            'scientific_name': taxonomic_match.scientific_name,
-            'common_name': taxonomic_match.common_name,
-            'taxonomy_string': taxonomic_match.taxonomy_string
-        }
-    else:
-        output_row = {
-            'dataset_name': dataset_name,
-            'query': query,
-            'source': '',
-            'taxonomy_level': '',
-            'scientific_name': '',
-            'common_name': '',
-            'taxonomy_string': ''
-        }
-    output_rows.append(output_row)
-# ...for each mapping
-#%% Write output rows
-import os
-import pandas as pd
-assert not os.path.isfile(output_file), 'Delete the output file before re-generating'
-output_df = pd.DataFrame(data=output_rows, columns=[
-    'dataset_name', 'query', 'source', 'taxonomy_level',
-    'scientific_name', 'common_name', 'taxonomy_string'])
-output_df.to_csv(output_file, index=None, header=True)
-#%% Manual lookup
-if False:
-    #%%
-    # q = 'white-throated monkey'
-    # q = 'cingulata'
-    # q = 'notamacropus'
-    q = 'porzana'
-    taxonomy_preference = 'inat'
-    m = get_preferred_taxonomic_match(q,taxonomy_preference)
-    # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
-    if m is None:
-        print('No match')
-    else:
-        if m.source != taxonomy_preference:
-            print('\n*** non-preferred match ***\n')
-            # raise ValueError('')
-        print(m.source)
-        print(m.taxonomy_string)
-        # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
-        import clipboard; clipboard.copy(m.taxonomy_string)
+"""
+map_new_lila_datasets.py
+Given a subset of LILA datasets, find all the categories, and start the taxonomy
+mapping process.
+"""
+#%% Constants and imports
+import os
+import json
+# Created by get_lila_category_list.py
+input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
+output_file = os.path.expanduser('~/lila/lila_additions_2023.12.29.csv')
+datasets_to_map = [
+    'Trail Camera Images of New Zealand Animals'
+    ]
+#%% Initialize taxonomic lookup
+from taxonomy_mapping.species_lookup import (
+    initialize_taxonomy_lookup,
+    get_preferred_taxonomic_match)
+# from taxonomy_mapping.species_lookup import (
+#    get_taxonomic_info, print_taxonomy_matche)
+initialize_taxonomy_lookup(force_init=False)
+#%% Read the list of datasets
+with open(input_lila_category_list_file,'r') as f:
+    input_lila_categories = json.load(f)
+lila_datasets = set()
+for dataset_name in input_lila_categories.keys():
+    # The script that generates this dictionary creates a separate entry for bounding box
+    # metadata files, but those don't represent new dataset names
+    lila_datasets.add(dataset_name.replace('_bbox',''))
+for s in datasets_to_map:
+    assert s in lila_datasets
+#%% Find all categories
+category_mappings = []
+# dataset_name = datasets_to_map[0]
+for dataset_name in datasets_to_map:
+    ds_categories = input_lila_categories[dataset_name]
+    for category in ds_categories:
+        category_name = category['name']
+        assert ':' not in category_name
+        mapping_name = dataset_name + ':' + category_name
+        category_mappings.append(mapping_name)
+print('Need to create {} mappings'.format(len(category_mappings)))
+#%% Match every query against our taxonomies
+output_rows = []
+taxonomy_preference = 'inat'
+allow_non_preferred_matches = True
+# mapping_string = category_mappings[1]; print(mapping_string)
+for mapping_string in category_mappings:
+    tokens = mapping_string.split(':')
+    assert len(tokens) == 2
+    dataset_name = tokens[0]
+    query = tokens[1]
+    taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
+    if (taxonomic_match.source == taxonomy_preference) or allow_non_preferred_matches:
+        output_row = {
+            'dataset_name': dataset_name,
+            'query': query,
+            'source': taxonomic_match.source,
+            'taxonomy_level': taxonomic_match.taxonomic_level,
+            'scientific_name': taxonomic_match.scientific_name,
+            'common_name': taxonomic_match.common_name,
+            'taxonomy_string': taxonomic_match.taxonomy_string
+        }
+    else:
+        output_row = {
+            'dataset_name': dataset_name,
+            'query': query,
+            'source': '',
+            'taxonomy_level': '',
+            'scientific_name': '',
+            'common_name': '',
+            'taxonomy_string': ''
+        }
+    output_rows.append(output_row)
+# ...for each mapping
+#%% Write output rows
+import os
+import pandas as pd
+assert not os.path.isfile(output_file), 'Delete the output file before re-generating'
+output_df = pd.DataFrame(data=output_rows, columns=[
+    'dataset_name', 'query', 'source', 'taxonomy_level',
+    'scientific_name', 'common_name', 'taxonomy_string'])
+output_df.to_csv(output_file, index=None, header=True)
+#%% Manual lookup
+if False:
+    #%%
+    # q = 'white-throated monkey'
+    # q = 'cingulata'
+    # q = 'notamacropus'
+    q = 'porzana'
+    taxonomy_preference = 'inat'
+    m = get_preferred_taxonomic_match(q,taxonomy_preference)
+    # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
+    if m is None:
+        print('No match')
+    else:
+        if m.source != taxonomy_preference:
+            print('\n*** non-preferred match ***\n')
+            # raise ValueError('')
+        print(m.source)
+        print(m.taxonomy_string)
+        # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
+        import clipboard; clipboard.copy(m.taxonomy_string)

taxonomy_mapping/prepare_lila_taxonomy_release.py CHANGED Viewed

@@ -1,134 +1,142 @@
-########
-#
-# prepare_lila_taxonomy_release.py
-#
-# Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
-# prepare the public (release) taxonomy mapping file.
-#
-########
-#%% Imports and constants
-import os
-import json
-import pandas as pd
-lila_taxonomy_file = 'c:/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv'
-release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
-# import clipboard; clipboard.copy(release_taxonomy_file)
-# Created by get_lila_category_list.py... contains counts for each category
-lila_dataset_to_categories_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-assert os.path.isfile(lila_dataset_to_categories_file)
-assert os.path.isfile(lila_taxonomy_file)
-#%% Find out which categories are actually used
-df = pd.read_csv(lila_taxonomy_file)
-with open(lila_dataset_to_categories_file,'r') as f:
-    lila_dataset_to_categories = json.load(f)
-used_category_mappings = []
-# dataset_name = datasets_to_map[0]
-for dataset_name in lila_dataset_to_categories.keys():
-    ds_categories = lila_dataset_to_categories[dataset_name]
-    for category in ds_categories:
-        category_name = category['name'].lower()
-        assert ':' not in category_name
-        mapping_name = dataset_name + ':' + category_name
-        used_category_mappings.append(mapping_name)
-df['used'] = False
-# i_row = 0; row = df.iloc[i_row]; row
-for i_row,row in df.iterrows():
-    ds_name = row['dataset_name']
-    query = row['query']
-    mapping_name = ds_name + ':' + query
-    if mapping_name in used_category_mappings:
-        df.loc[i_row,'used'] = True
-    else:
-        print('Dropping unused mapping {}'.format(mapping_name))
-df = df[df.used]
-df = df.drop('used',axis=1)
-#%% Generate the final output file
-assert not os.path.isfile(release_taxonomy_file)
-known_levels = ['stateofmatter',
-                     'kingdom',
-                     'phylum','subphylum',
-                     'superclass','class','subclass','infraclass',
-                     'superorder','order','parvorder','suborder','infraorder',
-                     'zoosection',
-                     'superfamily','family','subfamily','tribe',
-                     'genus',
-                     'species','subspecies','variety']
-levels_to_include = ['kingdom',
-                     'phylum','subphylum',
-                     'superclass','class','subclass','infraclass',
-                     'superorder','order','suborder','infraorder',
-                     'superfamily','family','subfamily','tribe',
-                     'genus',
-                     'species','subspecies','variety']
-levels_to_exclude = ['stateofmatter','zoosection','parvorder']
-for s in levels_to_exclude:
-    assert s not in levels_to_include
-levels_used = set()
-# i_row = 0; row = df.iloc[i_row]; row
-for i_row,row in df.iterrows():
-    if not isinstance(row['scientific_name'],str):
-        assert not isinstance(row['taxonomy_string'],str)
-        continue
-    taxonomic_match = eval(row['taxonomy_string'])
-    # match_at_level = taxonomic_match[0]
-    for match_at_level in taxonomic_match:
-        assert len(match_at_level) == 4
-        levels_used.add(match_at_level[1])
-levels_used = [s for s in levels_used if isinstance(s,str)]
-for s in levels_used:
-    assert s in levels_to_exclude or s in levels_to_include, 'Unrecognized level {}'.format(s)
-for s in levels_to_include:
-    assert s in levels_used
-for s in levels_to_include:
-    df[s] = ''
-# i_row = 0; row = df.iloc[i_row]; row
-for i_row,row in df.iterrows():
-    if not isinstance(row['scientific_name'],str):
-        assert not isinstance(row['taxonomy_string'],str)
-        continue
-    # E.g.: (43117, 'genus', 'lepus', ['hares and jackrabbits']
-    taxonomic_match = eval(row['taxonomy_string'])
-    for match_at_level in taxonomic_match:
-        level = match_at_level[1]
-        if level in levels_to_include:
-            df.loc[i_row,level] = match_at_level[2]
-df = df.drop('source',axis=1)
-df.to_csv(release_taxonomy_file,header=True,index=False)
-print('Wrote final output to {}'.format(release_taxonomy_file))
+"""
+prepare_lila_taxonomy_release.py
+Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
+prepare the public (release) taxonomy mapping file.
+"""
+#%% Imports and constants
+import os
+import json
+import pandas as pd
+#%% Prevent execution during infrastructural imports
+if False:
+    #%% Filenames
+    lila_taxonomy_file = 'c:/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv'
+    release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
+    # import clipboard; clipboard.copy(release_taxonomy_file)
+    # Created by get_lila_category_list.py... contains counts for each category
+    lila_dataset_to_categories_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
+    assert os.path.isfile(lila_dataset_to_categories_file)
+    assert os.path.isfile(lila_taxonomy_file)
+    #%% Find out which categories are actually used
+    df = pd.read_csv(lila_taxonomy_file)
+    with open(lila_dataset_to_categories_file,'r') as f:
+        lila_dataset_to_categories = json.load(f)
+    used_category_mappings = []
+    # dataset_name = datasets_to_map[0]
+    for dataset_name in lila_dataset_to_categories.keys():
+        ds_categories = lila_dataset_to_categories[dataset_name]
+        for category in ds_categories:
+            category_name = category['name'].lower()
+            assert ':' not in category_name
+            mapping_name = dataset_name + ':' + category_name
+            used_category_mappings.append(mapping_name)
+    df['used'] = False
+    # i_row = 0; row = df.iloc[i_row]; row
+    for i_row,row in df.iterrows():
+        ds_name = row['dataset_name']
+        query = row['query']
+        mapping_name = ds_name + ':' + query
+        if mapping_name in used_category_mappings:
+            df.loc[i_row,'used'] = True
+        else:
+            print('Dropping unused mapping {}'.format(mapping_name))
+    df = df[df.used]
+    df = df.drop('used',axis=1)
+    #%% Generate the final output file
+    assert not os.path.isfile(release_taxonomy_file)
+    known_levels = ['stateofmatter', #noqa
+                        'kingdom',
+                        'phylum','subphylum',
+                        'superclass','class','subclass','infraclass',
+                        'superorder','order','parvorder','suborder','infraorder',
+                        'zoosection',
+                        'superfamily','family','subfamily','tribe',
+                        'genus',
+                        'species','subspecies','variety']
+    levels_to_include = ['kingdom',
+                        'phylum','subphylum',
+                        'superclass','class','subclass','infraclass',
+                        'superorder','order','suborder','infraorder',
+                        'superfamily','family','subfamily','tribe',
+                        'genus',
+                        'species','subspecies','variety']
+    levels_to_exclude = ['stateofmatter','zoosection','parvorder']
+    for s in levels_to_exclude:
+        assert s not in levels_to_include
+    levels_used = set()
+    # i_row = 0; row = df.iloc[i_row]; row
+    for i_row,row in df.iterrows():
+        if not isinstance(row['scientific_name'],str):
+            assert not isinstance(row['taxonomy_string'],str)
+            continue
+        taxonomic_match = eval(row['taxonomy_string'])
+        # match_at_level = taxonomic_match[0]
+        for match_at_level in taxonomic_match:
+            assert len(match_at_level) == 4
+            levels_used.add(match_at_level[1])
+    levels_used = [s for s in levels_used if isinstance(s,str)]
+    for s in levels_used:
+        assert s in levels_to_exclude or s in levels_to_include, 'Unrecognized level {}'.format(s)
+    for s in levels_to_include:
+        assert s in levels_used
+    for s in levels_to_include:
+        df[s] = ''
+    # i_row = 0; row = df.iloc[i_row]; row
+    for i_row,row in df.iterrows():
+        if not isinstance(row['scientific_name'],str):
+            assert not isinstance(row['taxonomy_string'],str)
+            continue
+        # E.g.: (43117, 'genus', 'lepus', ['hares and jackrabbits']
+        taxonomic_match = eval(row['taxonomy_string'])
+        for match_at_level in taxonomic_match:
+            level = match_at_level[1]
+            if level in levels_to_include:
+                df.loc[i_row,level] = match_at_level[2]
+    df = df.drop('source',axis=1)
+    df.to_csv(release_taxonomy_file,header=True,index=False)
+    print('Wrote final output to {}'.format(release_taxonomy_file))

megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.10py3-none-any.whl