PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py CHANGED Viewed

@@ -2,7 +2,7 @@
 map_lila_taxonomy_to_wi_taxonomy.py
-Loads the LILA category mapping (in which taxonomy information comes from an
+Loads the LILA category mapping (in which taxonomy information comes from an
 iNat taxonomy snapshot) and tries to map each class to the Wildlife Insights taxonomy.
 """
@@ -22,9 +22,9 @@ from megadetector.data_management.lila.lila_common import \
 #%% Prevent execution during infrastructural imports
 if False:
     #%%
     lila_local_base = os.path.expanduser('~/lila')
     metadata_dir = os.path.join(lila_local_base, 'metadata')
@@ -65,9 +65,9 @@ if False:
     #%% Cache WI taxonomy lookups
-    def is_empty_wi_item(v):
+    def _is_empty_wi_item(v):
         if isinstance(v, str):
-            return len(v) == 0
+            return len(v) == 0
         elif v is None:
             return True
         else:
@@ -75,7 +75,7 @@ if False:
             return True
-    def taxonomy_items_equal(a, b):
+    def _taxonomy_items_equal(a, b):
         if isinstance(a, str) and (not isinstance(b, str)):
             return False
         if isinstance(b, str) and (not isinstance(a, str)):
@@ -121,7 +121,7 @@ if False:
             v = taxon[k]
             if isinstance(v,str):
                 taxon[k] = v.strip()
         if taxon['commonNameEnglish'] in ignore_taxa:
             continue
@@ -152,37 +152,37 @@ if False:
                 continue
         # Do we have a species name?
-        if not is_empty_wi_item(taxon['species']):
+        if not _is_empty_wi_item(taxon['species']):
             # If 'species' is populated, 'genus' should always be populated; one item currently breaks
             # this rule.
-            assert not is_empty_wi_item(taxon['genus'])
+            assert not _is_empty_wi_item(taxon['genus'])
             taxon_name = (taxon['genus'].strip() + ' ' +
                         taxon['species'].strip()).strip().lower()
-            assert not is_empty_wi_item(taxon['class']) and \
-                not is_empty_wi_item(taxon['order']) and \
-                not is_empty_wi_item(taxon['family'])
+            assert not _is_empty_wi_item(taxon['class']) and \
+                not _is_empty_wi_item(taxon['order']) and \
+                not _is_empty_wi_item(taxon['family'])
-        elif not is_empty_wi_item(taxon['genus']):
+        elif not _is_empty_wi_item(taxon['genus']):
-            assert not is_empty_wi_item(taxon['class']) and \
-                not is_empty_wi_item(taxon['order']) and \
-                not is_empty_wi_item(taxon['family'])
+            assert not _is_empty_wi_item(taxon['class']) and \
+                not _is_empty_wi_item(taxon['order']) and \
+                not _is_empty_wi_item(taxon['family'])
             taxon_name = taxon['genus'].strip().lower()
-        elif not is_empty_wi_item(taxon['family']):
+        elif not _is_empty_wi_item(taxon['family']):
-            assert not is_empty_wi_item(taxon['class']) and \
-                not is_empty_wi_item(taxon['order'])
+            assert not _is_empty_wi_item(taxon['class']) and \
+                not _is_empty_wi_item(taxon['order'])
             taxon_name = taxon['family'].strip().lower()
-        elif not is_empty_wi_item(taxon['order']):
+        elif not _is_empty_wi_item(taxon['order']):
-            assert not is_empty_wi_item(taxon['class'])
+            assert not _is_empty_wi_item(taxon['class'])
             taxon_name = taxon['order'].strip().lower()
-        elif not is_empty_wi_item(taxon['class']):
+        elif not _is_empty_wi_item(taxon['class']):
             taxon_name = taxon['class'].strip().lower()
@@ -204,8 +204,8 @@ if False:
                             level,previous_taxon[level],
                             previous_taxon['taxon_name'],
                             level,taxon[level])
-                        assert taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
+                        assert _taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
         taxon['taxon_name'] = taxon_name
         if taxon_name == 'homo sapiens':
             human_taxa.append(taxon)
@@ -234,7 +234,7 @@ if False:
         pass
         #%% Manual review of redundant taxa
         s = taxon_names_with_multiple_entries[15]
         taxa = wi_taxon_name_to_taxa[s]
         for t in taxa:
@@ -249,19 +249,19 @@ if False:
     taxon_name_to_preferred_taxon_id = {}
     # "helmeted guineafowl" vs "domestic guineafowl"
-    taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
+    taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
     # "domestic turkey" vs. "wild turkey"
-    taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
+    taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
     # multiple sensible human entries
-    taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
+    taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
     # "domestic dog" and "dog-on-leash"
-    taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
+    taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
     # "small mammal" vs. "mammal"
-    taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
+    taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
     # "Hispaniolan Mango" vs. NaN
     taxon_name_to_preferred_taxon_id['anthracothorax dominicus'] = 'f94e6d97-59cf-4d38-a05a-a75efdd2863b'
@@ -276,19 +276,19 @@ if False:
     taxon_name_to_preferred_taxon_id['stagonopleura bella'] = '7fec8e7e-fd3b-4d7f-99fd-3ade6f3bbaa5' # 2021939
     # "yellow wagtail" vs. "yellow crowned-wagtail"
-    taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
+    taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
     # "dremomys species" vs. "dremomys genus"
     taxon_name_to_preferred_taxon_id['dremomys'] = '1507d153-af11-46f1-bfb8-77918d035ab3' # 2019370
     # "elk" vs. "domestic elk"
-    taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
+    taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
     # "American bison" vs. "domestic bison"
-    taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
+    taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
     # "woodrat or rat or mouse species" vs. "mouse species"
-    taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
+    taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
     # both "southern sand frog"
     taxon_name_to_preferred_taxon_id['tomopterna adiastola'] = 'a5dc63cb-41be-4090-84a7-b944b16dcee4' # 2021834
@@ -296,18 +296,18 @@ if False:
     # sericornis species vs. scrubwren species
     taxon_name_to_preferred_taxon_id['sericornis'] = 'ad82c0ac-df48-4028-bf71-d2b2f4bc4129' # 2021776
     # taxon_name = list(taxon_name_to_preferred_taxon_id.keys())[0]
     for taxon_name in taxon_name_to_preferred_taxon_id.keys():
         candidate_taxa = wi_taxon_name_to_taxa[taxon_name]
         # If we've gotten this far, we should be choosing from multiple taxa.
         #
         # This will become untrue if any of these are resolved later, at which point we should
         # remove them from taxon_name_to_preferred_id
         assert len(candidate_taxa) > 1, 'Only one taxon available for {}'.format(taxon_name)
         # Choose the preferred taxa
         selected_taxa = [t for t in candidate_taxa if t[id_column] == \
                         taxon_name_to_preferred_taxon_id[taxon_name]]
@@ -365,7 +365,7 @@ if False:
         query = None
         lila_dataset_category = lila_taxon['dataset_name'] + ':' + lila_taxon['query']
         # Go from kingdom --> species, choosing the lowest-level description as the query
         for level in lila_taxonomy_levels:
             if isinstance(lila_taxon[level], str):
@@ -455,37 +455,37 @@ if False:
     #%% Map LILA datasets to WI taxa, and count the number of each taxon available in each dataset
     with open(wi_mapping_table_file,'w') as f:
         f.write('lila_dataset_name,lila_category_name,wi_guid,wi_taxon_name,wi_common,count\n')
         # dataset_name = list(lila_dataset_to_categories.keys())[0]
         for dataset_name in lila_dataset_to_categories.keys():
             if '_bbox' in dataset_name:
                 continue
             dataset_categories = lila_dataset_to_categories[dataset_name]
             # dataset_category = dataset_categories[0]
             for category in dataset_categories:
                 lila_dataset_category = dataset_name + ':' + category['name'].strip().lower()
                 if '#' in lila_dataset_category:
                     continue
                 assert lila_dataset_category in lila_dataset_category_to_lila_taxon
                 assert lila_dataset_category in lila_dataset_category_to_wi_taxon
                 assert 'count' in category
                 wi_taxon = lila_dataset_category_to_wi_taxon[lila_dataset_category]
-                # Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
+                # Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
                 # and count
                 s = f"{dataset_name},{category['name']},{wi_taxon['uniqueIdentifier']},"+\
                     f"{wi_taxon['taxon_name']},{wi_taxon['commonNameEnglish']},{category['count']}\n"
                 f.write(s)
             # ...for each category in this dataset
-        # ...for each dataset
+        # ...for each dataset
     # ...with open()

megadetector/taxonomy_mapping/map_new_lila_datasets.py CHANGED Viewed

@@ -15,10 +15,10 @@ import json
 # Created by get_lila_category_list.py
 input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
-output_file = os.path.expanduser('~/lila/lila_additions_2025.03.24.csv')
+output_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
 datasets_to_map = [
-    'UNSW Predators'
+    'Nkhotakota Camera Traps'
     ]
@@ -48,7 +48,7 @@ for s in datasets_to_map:
     assert s in lila_datasets
-#%% Find all categories
+#%% Find all categories
 category_mappings = []
@@ -75,17 +75,17 @@ allow_non_preferred_matches = True
 # mapping_string = category_mappings[1]; print(mapping_string)
 for mapping_string in category_mappings:
     tokens = mapping_string.split(':')
-    assert len(tokens) == 2
+    assert len(tokens) == 2
     dataset_name = tokens[0]
     query = tokens[1]
     taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
     if (taxonomic_match.source == taxonomy_preference) or allow_non_preferred_matches:
         output_row = {
             'dataset_name': dataset_name,
             'query': query,
@@ -95,9 +95,9 @@ for mapping_string in category_mappings:
             'common_name': taxonomic_match.common_name,
             'taxonomy_string': taxonomic_match.taxonomy_string
         }
     else:
         output_row = {
             'dataset_name': dataset_name,
             'query': query,
@@ -107,10 +107,10 @@ for mapping_string in category_mappings:
             'common_name': '',
             'taxonomy_string': ''
         }
     output_rows.append(output_row)
-# ...for each mapping
+# ...for each mapping
 #%% Write output rows
@@ -133,19 +133,19 @@ output_df.to_csv(output_file, index=None, header=True)
 if False:
     #%% You probably want to open the .csv file first
     from megadetector.utils.path_utils import open_file
     open_file(output_file)
     #%%
-    q = 'dasyurus maculatus'
+    q = 'animalia'
     taxonomy_preference = 'inat'
     m = get_preferred_taxonomic_match(q,taxonomy_preference)
     # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
     if (m is None) or (len(m.taxonomy_string) == 0):
         print('No match')
     else:
@@ -154,5 +154,4 @@ if False:
             # raise ValueError('')
         print(m.source)
         print(m.taxonomy_string)
-        # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
         import clipboard; clipboard.copy(m.taxonomy_string)

megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py CHANGED Viewed

@@ -2,7 +2,7 @@
 prepare_lila_taxonomy_release.py
-Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
+Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
 prepare the public (release) taxonomy mapping file.
 """
@@ -17,9 +17,9 @@ import pandas as pd
 #%% Prevent execution during infrastructural imports
 if False:
     #%% Filenames
     lila_taxonomy_file = 'c:/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv'
     release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
     # import clipboard; clipboard.copy(release_taxonomy_file)
@@ -42,7 +42,7 @@ if False:
     # dataset_name = datasets_to_map[0]
     for dataset_name in lila_dataset_to_categories.keys():
         ds_categories = lila_dataset_to_categories[dataset_name]
         for category in ds_categories:
             category_name = category['name'].lower()
@@ -52,6 +52,8 @@ if False:
     df['used'] = False
+    n_dropped = 0
     # i_row = 0; row = df.iloc[i_row]; row
     for i_row,row in df.iterrows():
         ds_name = row['dataset_name']
@@ -60,8 +62,11 @@ if False:
         if mapping_name in used_category_mappings:
             df.loc[i_row,'used'] = True
         else:
+            n_dropped += 1
             print('Dropping unused mapping {}'.format(mapping_name))
+    print('Dropping {} of {} mappings'.format(n_dropped,len(df)))
     df = df[df.used]
     df = df.drop('used',axis=1)
@@ -71,66 +76,82 @@ if False:
     assert not os.path.isfile(release_taxonomy_file), \
         'File {} exists, delete it manually before proceeding'.format(release_taxonomy_file)
-    known_levels = ['stateofmatter', #noqa
-                        'kingdom',
-                        'phylum','subphylum',
-                        'superclass','class','subclass','infraclass',
-                        'superorder','order','parvorder','suborder','infraorder',
-                        'zoosection',
-                        'superfamily','family','subfamily','tribe',
-                        'genus',
-                        'species','subspecies','variety']
     levels_to_include = ['kingdom',
-                        'phylum','subphylum',
-                        'superclass','class','subclass','infraclass',
-                        'superorder','order','suborder','infraorder',
-                        'superfamily','family','subfamily','tribe',
-                        'genus',
-                        'species','subspecies','variety']
-    levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex','epifamily']
+                         'phylum',
+                         'subphylum',
+                         'superclass',
+                         'class',
+                         'subclass',
+                         'infraclass',
+                         'superorder',
+                         'order',
+                         'suborder',
+                         'infraorder',
+                         'superfamily',
+                         'family',
+                         'subfamily',
+                         'tribe',
+                         'genus',
+                         'subgenus',
+                         'species',
+                         'subspecies',
+                         'variety']
+    levels_to_exclude = ['stateofmatter',
+                         'zoosection',
+                         'parvorder',
+                         'complex',
+                         'epifamily']
+    for x in [levels_to_include,levels_to_exclude]:
+        assert len(x) == len(set(x))
     for s in levels_to_exclude:
         assert s not in levels_to_include
+    known_levels = levels_to_include + levels_to_exclude
     levels_used = set()
     # i_row = 0; row = df.iloc[i_row]; row
     for i_row,row in df.iterrows():
         if not isinstance(row['scientific_name'],str):
             assert not isinstance(row['taxonomy_string'],str)
             continue
+        # This is a list of length-4 tuples that each look like:
+        #
+        # (41789, 'species', 'taxidea taxus', ['american badger'])
         taxonomic_match = eval(row['taxonomy_string'])
         # match_at_level = taxonomic_match[0]
         for match_at_level in taxonomic_match:
             assert len(match_at_level) == 4
+            # E.g. "species"
             levels_used.add(match_at_level[1])
     levels_used = [s for s in levels_used if isinstance(s,str)]
     for s in levels_used:
-        assert s in levels_to_exclude or s in levels_to_include, 'Unrecognized level {}'.format(s)
+        assert s in known_levels, 'Unrecognized level {}'.format(s)
     for s in levels_to_include:
         assert s in levels_used
     for s in levels_to_include:
         df[s] = ''
     # i_row = 0; row = df.iloc[i_row]; row
     for i_row,row in df.iterrows():
         if not isinstance(row['scientific_name'],str):
             assert not isinstance(row['taxonomy_string'],str)
             continue
         # E.g.: (43117, 'genus', 'lepus', ['hares and jackrabbits']
         taxonomic_match = eval(row['taxonomy_string'])
         for match_at_level in taxonomic_match:
             level = match_at_level[1]
             if level in levels_to_include:

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl