megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
map_lila_taxonomy_to_wi_taxonomy.py
|
|
4
4
|
|
|
5
|
-
Loads the LILA category mapping (in which taxonomy information comes from an
|
|
5
|
+
Loads the LILA category mapping (in which taxonomy information comes from an
|
|
6
6
|
iNat taxonomy snapshot) and tries to map each class to the Wildlife Insights taxonomy.
|
|
7
7
|
|
|
8
8
|
"""
|
|
@@ -22,9 +22,9 @@ from megadetector.data_management.lila.lila_common import \
|
|
|
22
22
|
#%% Prevent execution during infrastructural imports
|
|
23
23
|
|
|
24
24
|
if False:
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
#%%
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
lila_local_base = os.path.expanduser('~/lila')
|
|
29
29
|
|
|
30
30
|
metadata_dir = os.path.join(lila_local_base, 'metadata')
|
|
@@ -65,9 +65,9 @@ if False:
|
|
|
65
65
|
|
|
66
66
|
#%% Cache WI taxonomy lookups
|
|
67
67
|
|
|
68
|
-
def
|
|
68
|
+
def _is_empty_wi_item(v):
|
|
69
69
|
if isinstance(v, str):
|
|
70
|
-
return len(v) == 0
|
|
70
|
+
return len(v) == 0
|
|
71
71
|
elif v is None:
|
|
72
72
|
return True
|
|
73
73
|
else:
|
|
@@ -75,7 +75,7 @@ if False:
|
|
|
75
75
|
return True
|
|
76
76
|
|
|
77
77
|
|
|
78
|
-
def
|
|
78
|
+
def _taxonomy_items_equal(a, b):
|
|
79
79
|
if isinstance(a, str) and (not isinstance(b, str)):
|
|
80
80
|
return False
|
|
81
81
|
if isinstance(b, str) and (not isinstance(a, str)):
|
|
@@ -121,7 +121,7 @@ if False:
|
|
|
121
121
|
v = taxon[k]
|
|
122
122
|
if isinstance(v,str):
|
|
123
123
|
taxon[k] = v.strip()
|
|
124
|
-
|
|
124
|
+
|
|
125
125
|
if taxon['commonNameEnglish'] in ignore_taxa:
|
|
126
126
|
continue
|
|
127
127
|
|
|
@@ -152,37 +152,37 @@ if False:
|
|
|
152
152
|
continue
|
|
153
153
|
|
|
154
154
|
# Do we have a species name?
|
|
155
|
-
if not
|
|
155
|
+
if not _is_empty_wi_item(taxon['species']):
|
|
156
156
|
|
|
157
157
|
# If 'species' is populated, 'genus' should always be populated; one item currently breaks
|
|
158
158
|
# this rule.
|
|
159
|
-
assert not
|
|
160
|
-
|
|
159
|
+
assert not _is_empty_wi_item(taxon['genus'])
|
|
160
|
+
|
|
161
161
|
taxon_name = (taxon['genus'].strip() + ' ' +
|
|
162
162
|
taxon['species'].strip()).strip().lower()
|
|
163
|
-
assert not
|
|
164
|
-
not
|
|
165
|
-
not
|
|
163
|
+
assert not _is_empty_wi_item(taxon['class']) and \
|
|
164
|
+
not _is_empty_wi_item(taxon['order']) and \
|
|
165
|
+
not _is_empty_wi_item(taxon['family'])
|
|
166
166
|
|
|
167
|
-
elif not
|
|
167
|
+
elif not _is_empty_wi_item(taxon['genus']):
|
|
168
168
|
|
|
169
|
-
assert not
|
|
170
|
-
not
|
|
171
|
-
not
|
|
169
|
+
assert not _is_empty_wi_item(taxon['class']) and \
|
|
170
|
+
not _is_empty_wi_item(taxon['order']) and \
|
|
171
|
+
not _is_empty_wi_item(taxon['family'])
|
|
172
172
|
taxon_name = taxon['genus'].strip().lower()
|
|
173
173
|
|
|
174
|
-
elif not
|
|
174
|
+
elif not _is_empty_wi_item(taxon['family']):
|
|
175
175
|
|
|
176
|
-
assert not
|
|
177
|
-
not
|
|
176
|
+
assert not _is_empty_wi_item(taxon['class']) and \
|
|
177
|
+
not _is_empty_wi_item(taxon['order'])
|
|
178
178
|
taxon_name = taxon['family'].strip().lower()
|
|
179
179
|
|
|
180
|
-
elif not
|
|
180
|
+
elif not _is_empty_wi_item(taxon['order']):
|
|
181
181
|
|
|
182
|
-
assert not
|
|
182
|
+
assert not _is_empty_wi_item(taxon['class'])
|
|
183
183
|
taxon_name = taxon['order'].strip().lower()
|
|
184
184
|
|
|
185
|
-
elif not
|
|
185
|
+
elif not _is_empty_wi_item(taxon['class']):
|
|
186
186
|
|
|
187
187
|
taxon_name = taxon['class'].strip().lower()
|
|
188
188
|
|
|
@@ -204,8 +204,8 @@ if False:
|
|
|
204
204
|
level,previous_taxon[level],
|
|
205
205
|
previous_taxon['taxon_name'],
|
|
206
206
|
level,taxon[level])
|
|
207
|
-
assert
|
|
208
|
-
|
|
207
|
+
assert _taxonomy_items_equal(previous_taxon[level], taxon[level]), error_string
|
|
208
|
+
|
|
209
209
|
taxon['taxon_name'] = taxon_name
|
|
210
210
|
if taxon_name == 'homo sapiens':
|
|
211
211
|
human_taxa.append(taxon)
|
|
@@ -234,7 +234,7 @@ if False:
|
|
|
234
234
|
pass
|
|
235
235
|
|
|
236
236
|
#%% Manual review of redundant taxa
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
s = taxon_names_with_multiple_entries[15]
|
|
239
239
|
taxa = wi_taxon_name_to_taxa[s]
|
|
240
240
|
for t in taxa:
|
|
@@ -249,19 +249,19 @@ if False:
|
|
|
249
249
|
taxon_name_to_preferred_taxon_id = {}
|
|
250
250
|
|
|
251
251
|
# "helmeted guineafowl" vs "domestic guineafowl"
|
|
252
|
-
taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
|
|
252
|
+
taxon_name_to_preferred_taxon_id['numida meleagris'] = '83133617-8358-4910-82ee-4c23e40ba3dc' # 2005826
|
|
253
253
|
|
|
254
254
|
# "domestic turkey" vs. "wild turkey"
|
|
255
|
-
taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
|
|
255
|
+
taxon_name_to_preferred_taxon_id['meleagris gallopavo'] = 'c10547c3-1748-48bf-a451-8066c820f22f' # 2021598
|
|
256
256
|
|
|
257
257
|
# multiple sensible human entries
|
|
258
|
-
taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
|
|
258
|
+
taxon_name_to_preferred_taxon_id['homo sapiens'] = '990ae9dd-7a59-4344-afcb-1b7b21368000' # 2002045
|
|
259
259
|
|
|
260
260
|
# "domestic dog" and "dog-on-leash"
|
|
261
|
-
taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
|
|
261
|
+
taxon_name_to_preferred_taxon_id['canis familiaris'] = '3d80f1d6-b1df-4966-9ff4-94053c7a902a' # 2021548
|
|
262
262
|
|
|
263
263
|
# "small mammal" vs. "mammal"
|
|
264
|
-
taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
|
|
264
|
+
taxon_name_to_preferred_taxon_id['mammalia'] = 'f2d233e3-80e3-433d-9687-e29ecc7a467a' # 2021108
|
|
265
265
|
|
|
266
266
|
# "Hispaniolan Mango" vs. NaN
|
|
267
267
|
taxon_name_to_preferred_taxon_id['anthracothorax dominicus'] = 'f94e6d97-59cf-4d38-a05a-a75efdd2863b'
|
|
@@ -276,19 +276,19 @@ if False:
|
|
|
276
276
|
taxon_name_to_preferred_taxon_id['stagonopleura bella'] = '7fec8e7e-fd3b-4d7f-99fd-3ade6f3bbaa5' # 2021939
|
|
277
277
|
|
|
278
278
|
# "yellow wagtail" vs. "yellow crowned-wagtail"
|
|
279
|
-
taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
|
|
279
|
+
taxon_name_to_preferred_taxon_id['motacilla flava'] = 'ac6669bc-9f9e-4473-b609-b9082f9bf50c' # 2016194
|
|
280
280
|
|
|
281
281
|
# "dremomys species" vs. "dremomys genus"
|
|
282
282
|
taxon_name_to_preferred_taxon_id['dremomys'] = '1507d153-af11-46f1-bfb8-77918d035ab3' # 2019370
|
|
283
283
|
|
|
284
284
|
# "elk" vs. "domestic elk"
|
|
285
|
-
taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
|
|
285
|
+
taxon_name_to_preferred_taxon_id['cervus canadensis'] = 'c5ce946f-8f0d-4379-992b-cc0982381f5e'
|
|
286
286
|
|
|
287
287
|
# "American bison" vs. "domestic bison"
|
|
288
|
-
taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
|
|
288
|
+
taxon_name_to_preferred_taxon_id['bison bison'] = '539ebd55-081b-429a-9ae6-5a6a0f6999d4' # 2021593
|
|
289
289
|
|
|
290
290
|
# "woodrat or rat or mouse species" vs. "mouse species"
|
|
291
|
-
taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
|
|
291
|
+
taxon_name_to_preferred_taxon_id['muridae'] = 'e7503287-468c-45af-a1bd-a17821bb62f2' # 2021642
|
|
292
292
|
|
|
293
293
|
# both "southern sand frog"
|
|
294
294
|
taxon_name_to_preferred_taxon_id['tomopterna adiastola'] = 'a5dc63cb-41be-4090-84a7-b944b16dcee4' # 2021834
|
|
@@ -296,18 +296,18 @@ if False:
|
|
|
296
296
|
# sericornis species vs. scrubwren species
|
|
297
297
|
taxon_name_to_preferred_taxon_id['sericornis'] = 'ad82c0ac-df48-4028-bf71-d2b2f4bc4129' # 2021776
|
|
298
298
|
|
|
299
|
-
|
|
299
|
+
|
|
300
300
|
# taxon_name = list(taxon_name_to_preferred_taxon_id.keys())[0]
|
|
301
301
|
for taxon_name in taxon_name_to_preferred_taxon_id.keys():
|
|
302
|
-
|
|
302
|
+
|
|
303
303
|
candidate_taxa = wi_taxon_name_to_taxa[taxon_name]
|
|
304
|
-
|
|
304
|
+
|
|
305
305
|
# If we've gotten this far, we should be choosing from multiple taxa.
|
|
306
306
|
#
|
|
307
307
|
# This will become untrue if any of these are resolved later, at which point we should
|
|
308
308
|
# remove them from taxon_name_to_preferred_id
|
|
309
309
|
assert len(candidate_taxa) > 1, 'Only one taxon available for {}'.format(taxon_name)
|
|
310
|
-
|
|
310
|
+
|
|
311
311
|
# Choose the preferred taxa
|
|
312
312
|
selected_taxa = [t for t in candidate_taxa if t[id_column] == \
|
|
313
313
|
taxon_name_to_preferred_taxon_id[taxon_name]]
|
|
@@ -365,7 +365,7 @@ if False:
|
|
|
365
365
|
query = None
|
|
366
366
|
|
|
367
367
|
lila_dataset_category = lila_taxon['dataset_name'] + ':' + lila_taxon['query']
|
|
368
|
-
|
|
368
|
+
|
|
369
369
|
# Go from kingdom --> species, choosing the lowest-level description as the query
|
|
370
370
|
for level in lila_taxonomy_levels:
|
|
371
371
|
if isinstance(lila_taxon[level], str):
|
|
@@ -455,37 +455,37 @@ if False:
|
|
|
455
455
|
#%% Map LILA datasets to WI taxa, and count the number of each taxon available in each dataset
|
|
456
456
|
|
|
457
457
|
with open(wi_mapping_table_file,'w') as f:
|
|
458
|
-
|
|
458
|
+
|
|
459
459
|
f.write('lila_dataset_name,lila_category_name,wi_guid,wi_taxon_name,wi_common,count\n')
|
|
460
|
-
|
|
460
|
+
|
|
461
461
|
# dataset_name = list(lila_dataset_to_categories.keys())[0]
|
|
462
462
|
for dataset_name in lila_dataset_to_categories.keys():
|
|
463
|
-
|
|
463
|
+
|
|
464
464
|
if '_bbox' in dataset_name:
|
|
465
465
|
continue
|
|
466
|
-
|
|
466
|
+
|
|
467
467
|
dataset_categories = lila_dataset_to_categories[dataset_name]
|
|
468
|
-
|
|
468
|
+
|
|
469
469
|
# dataset_category = dataset_categories[0]
|
|
470
470
|
for category in dataset_categories:
|
|
471
|
-
|
|
471
|
+
|
|
472
472
|
lila_dataset_category = dataset_name + ':' + category['name'].strip().lower()
|
|
473
473
|
if '#' in lila_dataset_category:
|
|
474
474
|
continue
|
|
475
475
|
assert lila_dataset_category in lila_dataset_category_to_lila_taxon
|
|
476
476
|
assert lila_dataset_category in lila_dataset_category_to_wi_taxon
|
|
477
477
|
assert 'count' in category
|
|
478
|
-
|
|
478
|
+
|
|
479
479
|
wi_taxon = lila_dataset_category_to_wi_taxon[lila_dataset_category]
|
|
480
|
-
|
|
481
|
-
# Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
|
|
480
|
+
|
|
481
|
+
# Write out the dataset name, category name, WI GUID, WI scientific name, WI common name,
|
|
482
482
|
# and count
|
|
483
483
|
s = f"{dataset_name},{category['name']},{wi_taxon['uniqueIdentifier']},"+\
|
|
484
484
|
f"{wi_taxon['taxon_name']},{wi_taxon['commonNameEnglish']},{category['count']}\n"
|
|
485
485
|
f.write(s)
|
|
486
|
-
|
|
486
|
+
|
|
487
487
|
# ...for each category in this dataset
|
|
488
|
-
|
|
489
|
-
# ...for each dataset
|
|
488
|
+
|
|
489
|
+
# ...for each dataset
|
|
490
490
|
|
|
491
491
|
# ...with open()
|
|
@@ -48,7 +48,7 @@ for s in datasets_to_map:
|
|
|
48
48
|
assert s in lila_datasets
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
#%% Find all categories
|
|
51
|
+
#%% Find all categories
|
|
52
52
|
|
|
53
53
|
category_mappings = []
|
|
54
54
|
|
|
@@ -75,17 +75,17 @@ allow_non_preferred_matches = True
|
|
|
75
75
|
|
|
76
76
|
# mapping_string = category_mappings[1]; print(mapping_string)
|
|
77
77
|
for mapping_string in category_mappings:
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
tokens = mapping_string.split(':')
|
|
80
|
-
assert len(tokens) == 2
|
|
80
|
+
assert len(tokens) == 2
|
|
81
81
|
|
|
82
82
|
dataset_name = tokens[0]
|
|
83
83
|
query = tokens[1]
|
|
84
84
|
|
|
85
85
|
taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
|
|
86
|
-
|
|
86
|
+
|
|
87
87
|
if (taxonomic_match.source == taxonomy_preference) or allow_non_preferred_matches:
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
output_row = {
|
|
90
90
|
'dataset_name': dataset_name,
|
|
91
91
|
'query': query,
|
|
@@ -95,9 +95,9 @@ for mapping_string in category_mappings:
|
|
|
95
95
|
'common_name': taxonomic_match.common_name,
|
|
96
96
|
'taxonomy_string': taxonomic_match.taxonomy_string
|
|
97
97
|
}
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
else:
|
|
100
|
-
|
|
100
|
+
|
|
101
101
|
output_row = {
|
|
102
102
|
'dataset_name': dataset_name,
|
|
103
103
|
'query': query,
|
|
@@ -107,10 +107,10 @@ for mapping_string in category_mappings:
|
|
|
107
107
|
'common_name': '',
|
|
108
108
|
'taxonomy_string': ''
|
|
109
109
|
}
|
|
110
|
-
|
|
110
|
+
|
|
111
111
|
output_rows.append(output_row)
|
|
112
|
-
|
|
113
|
-
# ...for each mapping
|
|
112
|
+
|
|
113
|
+
# ...for each mapping
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
#%% Write output rows
|
|
@@ -133,19 +133,19 @@ output_df.to_csv(output_file, index=None, header=True)
|
|
|
133
133
|
if False:
|
|
134
134
|
|
|
135
135
|
#%% You probably want to open the .csv file first
|
|
136
|
-
|
|
136
|
+
|
|
137
137
|
from megadetector.utils.path_utils import open_file
|
|
138
138
|
open_file(output_file)
|
|
139
139
|
|
|
140
|
-
|
|
140
|
+
|
|
141
141
|
#%%
|
|
142
|
-
|
|
142
|
+
|
|
143
143
|
q = 'dasyurus maculatus'
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
taxonomy_preference = 'inat'
|
|
146
146
|
m = get_preferred_taxonomic_match(q,taxonomy_preference)
|
|
147
147
|
# print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
|
|
148
|
-
|
|
148
|
+
|
|
149
149
|
if (m is None) or (len(m.taxonomy_string) == 0):
|
|
150
150
|
print('No match')
|
|
151
151
|
else:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
prepare_lila_taxonomy_release.py
|
|
4
4
|
|
|
5
|
-
Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
|
|
5
|
+
Given the private intermediate taxonomy mapping (produced by map_new_lila_datasets.py),
|
|
6
6
|
prepare the public (release) taxonomy mapping file.
|
|
7
7
|
|
|
8
8
|
"""
|
|
@@ -17,9 +17,9 @@ import pandas as pd
|
|
|
17
17
|
#%% Prevent execution during infrastructural imports
|
|
18
18
|
|
|
19
19
|
if False:
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
#%% Filenames
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
lila_taxonomy_file = 'c:/git/agentmorrisprivate/lila-taxonomy/lila-taxonomy-mapping.csv'
|
|
24
24
|
release_taxonomy_file = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
|
|
25
25
|
# import clipboard; clipboard.copy(release_taxonomy_file)
|
|
@@ -42,7 +42,7 @@ if False:
|
|
|
42
42
|
|
|
43
43
|
# dataset_name = datasets_to_map[0]
|
|
44
44
|
for dataset_name in lila_dataset_to_categories.keys():
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
ds_categories = lila_dataset_to_categories[dataset_name]
|
|
47
47
|
for category in ds_categories:
|
|
48
48
|
category_name = category['name'].lower()
|
|
@@ -93,23 +93,23 @@ if False:
|
|
|
93
93
|
|
|
94
94
|
for s in levels_to_exclude:
|
|
95
95
|
assert s not in levels_to_include
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
levels_used = set()
|
|
98
98
|
|
|
99
99
|
# i_row = 0; row = df.iloc[i_row]; row
|
|
100
100
|
for i_row,row in df.iterrows():
|
|
101
|
-
|
|
101
|
+
|
|
102
102
|
if not isinstance(row['scientific_name'],str):
|
|
103
103
|
assert not isinstance(row['taxonomy_string'],str)
|
|
104
104
|
continue
|
|
105
|
-
|
|
105
|
+
|
|
106
106
|
taxonomic_match = eval(row['taxonomy_string'])
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
# match_at_level = taxonomic_match[0]
|
|
109
109
|
for match_at_level in taxonomic_match:
|
|
110
110
|
assert len(match_at_level) == 4
|
|
111
111
|
levels_used.add(match_at_level[1])
|
|
112
|
-
|
|
112
|
+
|
|
113
113
|
levels_used = [s for s in levels_used if isinstance(s,str)]
|
|
114
114
|
|
|
115
115
|
for s in levels_used:
|
|
@@ -117,20 +117,20 @@ if False:
|
|
|
117
117
|
|
|
118
118
|
for s in levels_to_include:
|
|
119
119
|
assert s in levels_used
|
|
120
|
-
|
|
120
|
+
|
|
121
121
|
for s in levels_to_include:
|
|
122
122
|
df[s] = ''
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
# i_row = 0; row = df.iloc[i_row]; row
|
|
125
125
|
for i_row,row in df.iterrows():
|
|
126
|
-
|
|
126
|
+
|
|
127
127
|
if not isinstance(row['scientific_name'],str):
|
|
128
128
|
assert not isinstance(row['taxonomy_string'],str)
|
|
129
129
|
continue
|
|
130
|
-
|
|
130
|
+
|
|
131
131
|
# E.g.: (43117, 'genus', 'lepus', ['hares and jackrabbits']
|
|
132
132
|
taxonomic_match = eval(row['taxonomy_string'])
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
for match_at_level in taxonomic_match:
|
|
135
135
|
level = match_at_level[1]
|
|
136
136
|
if level in levels_to_include:
|