megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
- megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/detection/run_detector.py +1 -0
- megadetector/detection/run_detector_batch.py +5 -4
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/compare_batch_results.py +176 -9
- megadetector/postprocessing/create_crop_folder.py +420 -0
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +158 -44
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +76 -3
- megadetector/utils/directory_listing.py +4 -4
- megadetector/utils/gpu_test.py +21 -3
- megadetector/utils/md_tests.py +142 -49
- megadetector/utils/path_utils.py +342 -19
- megadetector/utils/wi_utils.py +1286 -212
- megadetector/visualization/visualization_utils.py +16 -4
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
megadetector/utils/wi_utils.py
CHANGED
|
@@ -27,8 +27,12 @@ from tqdm import tqdm
|
|
|
27
27
|
|
|
28
28
|
from megadetector.utils.path_utils import insert_before_extension
|
|
29
29
|
from megadetector.utils.ct_utils import split_list_into_n_chunks
|
|
30
|
+
from megadetector.utils.ct_utils import round_floats_in_nested_dict
|
|
31
|
+
from megadetector.utils.ct_utils import is_list_sorted
|
|
30
32
|
from megadetector.utils.ct_utils import invert_dictionary
|
|
31
33
|
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
|
|
34
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
35
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_key
|
|
32
36
|
from megadetector.utils.path_utils import find_images
|
|
33
37
|
from megadetector.postprocessing.validate_batch_results import \
|
|
34
38
|
validate_batch_results, ValidateBatchResultsOptions
|
|
@@ -58,10 +62,127 @@ def is_valid_prediction_string(s):
|
|
|
58
62
|
Returns:
|
|
59
63
|
bool: True if this looks more or less like a WI prediction string
|
|
60
64
|
"""
|
|
61
|
-
|
|
65
|
+
|
|
66
|
+
# Note to self... don't get tempted to remove spaces here; spaces are used
|
|
67
|
+
# to indicate subspecies.
|
|
62
68
|
return isinstance(s,str) and (len(s.split(';')) == 7) and (s == s.lower())
|
|
63
69
|
|
|
64
70
|
|
|
71
|
+
def is_valid_taxonomy_string(s):
|
|
72
|
+
"""
|
|
73
|
+
Determine whether [s] is a valid 5-token WI taxonomy string. Taxonmy strings look like:
|
|
74
|
+
|
|
75
|
+
'mammalia;rodentia;;;;rodent'
|
|
76
|
+
'mammalia;chordata;canidae;canis;lupus dingo'
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
s (str): the string to be tested for validity
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
bool: True if this looks more or less like a WI taxonomy string
|
|
83
|
+
"""
|
|
84
|
+
return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def clean_taxonomy_string(s):
|
|
88
|
+
"""
|
|
89
|
+
If [s] is a seven-token prediction string, trim the GUID and common name to produce
|
|
90
|
+
a "clean" taxonomy string. Else if [s] is a five-token string, return it. Else error.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
s (str): the seven- or five-token taxonomy/prediction string to clean
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
str: the five-token taxonomy string
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
if is_valid_taxonomy_string(s):
|
|
100
|
+
return s
|
|
101
|
+
elif is_valid_prediction_string(s):
|
|
102
|
+
tokens = s.split(';')
|
|
103
|
+
assert len(tokens) == 7
|
|
104
|
+
return ';'.join(tokens[1:-1])
|
|
105
|
+
else:
|
|
106
|
+
raise ValueError('Invalid taxonomy string')
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
taxonomy_level_names = \
|
|
110
|
+
['non-taxonomic','kingdom','phylum','class','order','family','genus','species','subspecies']
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def taxonomy_level_to_string(k):
|
|
114
|
+
"""
|
|
115
|
+
Maps taxonomy level indices (0 for kindgom, 1 for phylum, etc.) to strings.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
k (int): taxonomy level index
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
str: taxonomy level string
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
assert k >= 0 and k < len(taxonomy_level_names), \
|
|
125
|
+
'Illegal taxonomy level index {}'.format(k)
|
|
126
|
+
|
|
127
|
+
return taxonomy_level_names[k]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def taxonomy_level_string_to_index(s):
|
|
131
|
+
"""
|
|
132
|
+
Maps strings ('kingdom', 'species', etc.) to level indices.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
s (str): taxonomy level string
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
int: taxonomy level index
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
assert s in taxonomy_level_names, 'Unrecognized taxonomy level string {}'.format(s)
|
|
142
|
+
return taxonomy_level_names.index(s)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def taxonomy_level_index(s):
|
|
146
|
+
"""
|
|
147
|
+
Returns the taxonomy level up to which [s] is defined (0 for non-taxnomic, 1 for kingdom,
|
|
148
|
+
2 for phylum, etc. Empty strings and non-taxonomic strings are treated as level 0. 1 and 2
|
|
149
|
+
will never be returned; "animal" doesn't look like other taxonomic strings, so here we treat
|
|
150
|
+
it as non-taxonomic.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
s (str): 5-token or 7-token taxonomy string
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
int: taxonomy level
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
if s in non_taxonomic_prediction_strings or s in non_taxonomic_prediction_short_strings:
|
|
160
|
+
return 0
|
|
161
|
+
|
|
162
|
+
tokens = s.split(';')
|
|
163
|
+
assert len(tokens) in (5,7)
|
|
164
|
+
|
|
165
|
+
if len(tokens) == 7:
|
|
166
|
+
tokens = tokens[1:-1]
|
|
167
|
+
|
|
168
|
+
if len(tokens[0]) == 0:
|
|
169
|
+
return 0
|
|
170
|
+
# WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
|
|
171
|
+
elif len(tokens[1]) == 0:
|
|
172
|
+
return 3
|
|
173
|
+
elif len(tokens[2]) == 0:
|
|
174
|
+
return 4
|
|
175
|
+
elif len(tokens[3]) == 0:
|
|
176
|
+
return 5
|
|
177
|
+
elif len(tokens[4]) == 0:
|
|
178
|
+
return 6
|
|
179
|
+
# Subspecies are delimited with a space
|
|
180
|
+
elif ' ' not in tokens[4]:
|
|
181
|
+
return 7
|
|
182
|
+
else:
|
|
183
|
+
return 8
|
|
184
|
+
|
|
185
|
+
|
|
65
186
|
def wi_result_to_prediction_string(r):
|
|
66
187
|
"""
|
|
67
188
|
Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
|
|
@@ -469,10 +590,18 @@ sample_update_payload = {
|
|
|
469
590
|
|
|
470
591
|
blank_prediction_string = 'f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank'
|
|
471
592
|
no_cv_result_prediction_string = 'f2efdae9-efb8-48fb-8a91-eccf79ab4ffb;no cv result;no cv result;no cv result;no cv result;no cv result;no cv result'
|
|
472
|
-
rodent_prediction_string = '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
|
|
473
|
-
mammal_prediction_string = 'f2d233e3-80e3-433d-9687-e29ecc7a467a;mammalia;;;;;mammal'
|
|
474
593
|
animal_prediction_string = '1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal'
|
|
475
594
|
human_prediction_string = '990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human'
|
|
595
|
+
vehicle_prediction_string = 'e2895ed5-780b-48f6-8a11-9e27cb594511;;;;;;vehicle'
|
|
596
|
+
|
|
597
|
+
non_taxonomic_prediction_strings = [blank_prediction_string,
|
|
598
|
+
no_cv_result_prediction_string,
|
|
599
|
+
animal_prediction_string,
|
|
600
|
+
vehicle_prediction_string]
|
|
601
|
+
|
|
602
|
+
non_taxonomic_prediction_short_strings = [';'.join(s.split(';')[1:-1]) for s in \
|
|
603
|
+
non_taxonomic_prediction_strings]
|
|
604
|
+
|
|
476
605
|
|
|
477
606
|
process_cv_response_url = 'https://placeholder'
|
|
478
607
|
|
|
@@ -870,6 +999,7 @@ def get_kingdom(prediction_string):
|
|
|
870
999
|
str: the kingdom field from the input string
|
|
871
1000
|
"""
|
|
872
1001
|
tokens = prediction_string.split(';')
|
|
1002
|
+
assert is_valid_prediction_string(prediction_string)
|
|
873
1003
|
return tokens[1]
|
|
874
1004
|
|
|
875
1005
|
|
|
@@ -885,6 +1015,19 @@ def is_human_classification(prediction_string):
|
|
|
885
1015
|
bool: whether this string corresponds to a human category
|
|
886
1016
|
"""
|
|
887
1017
|
return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
def is_vehicle_classification(prediction_string):
|
|
1021
|
+
"""
|
|
1022
|
+
Determines whether the input string represents a vehicle classification.
|
|
1023
|
+
|
|
1024
|
+
Args:
|
|
1025
|
+
prediction_string (str): a string in the semicolon-delimited prediction string format
|
|
1026
|
+
|
|
1027
|
+
Returns:
|
|
1028
|
+
bool: whether this string corresponds to the vehicle category
|
|
1029
|
+
"""
|
|
1030
|
+
return prediction_string == vehicle_prediction_string
|
|
888
1031
|
|
|
889
1032
|
|
|
890
1033
|
def is_animal_classification(prediction_string):
|
|
@@ -912,30 +1055,150 @@ def is_animal_classification(prediction_string):
|
|
|
912
1055
|
return True
|
|
913
1056
|
|
|
914
1057
|
|
|
915
|
-
def
|
|
1058
|
+
def generate_whole_image_detections_for_classifications(classifications_json_file,
|
|
1059
|
+
detections_json_file,
|
|
1060
|
+
ensemble_json_file=None,
|
|
1061
|
+
ignore_blank_classifications=True):
|
|
916
1062
|
"""
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
1063
|
+
Given a set of classification results that were likely run on already-cropped
|
|
1064
|
+
image, generate a file of [fake] detections in which each image is covered
|
|
1065
|
+
in a single whole-image detection.
|
|
1066
|
+
|
|
1067
|
+
Args:
|
|
1068
|
+
classifications_json_file (str): SpeciesNet-formatted file containing classifications
|
|
1069
|
+
detections_json_file (str): SpeciesNet-formatted file to write with detections
|
|
1070
|
+
ensemble_json_file (str, optional): SpeciesNet-formatted file to write with detections
|
|
1071
|
+
and classfications
|
|
1072
|
+
ignore_blank_classifications (bool, optional): use non-top classifications when
|
|
1073
|
+
the top classification is "blank" or "no CV result"
|
|
1074
|
+
|
|
1075
|
+
Returns:
|
|
1076
|
+
dict: the contents of [detections_json_file]
|
|
1077
|
+
"""
|
|
1078
|
+
|
|
1079
|
+
with open(classifications_json_file,'r') as f:
|
|
1080
|
+
classification_results = json.load(f)
|
|
1081
|
+
predictions = classification_results['predictions']
|
|
1082
|
+
|
|
1083
|
+
output_predictions = []
|
|
1084
|
+
ensemble_predictions = []
|
|
1085
|
+
|
|
1086
|
+
# prediction = predictions[0]
|
|
1087
|
+
for prediction in predictions:
|
|
1088
|
+
|
|
1089
|
+
output_prediction = {}
|
|
1090
|
+
output_prediction['filepath'] = prediction['filepath']
|
|
1091
|
+
i_score = 0
|
|
1092
|
+
if ignore_blank_classifications:
|
|
1093
|
+
while (prediction['classifications']['classes'][i_score] in \
|
|
1094
|
+
(blank_prediction_string,no_cv_result_prediction_string)):
|
|
1095
|
+
i_score += 1
|
|
1096
|
+
top_classification = prediction['classifications']['classes'][i_score]
|
|
1097
|
+
top_classification_score = prediction['classifications']['scores'][i_score]
|
|
1098
|
+
if is_animal_classification(top_classification):
|
|
1099
|
+
category_name = 'animal'
|
|
1100
|
+
elif is_human_classification(top_classification):
|
|
1101
|
+
category_name = 'human'
|
|
1102
|
+
else:
|
|
1103
|
+
category_name = 'vehicle'
|
|
1104
|
+
|
|
1105
|
+
if category_name == 'human':
|
|
1106
|
+
md_category_name = 'person'
|
|
1107
|
+
else:
|
|
1108
|
+
md_category_name = category_name
|
|
1109
|
+
|
|
1110
|
+
output_detection = {}
|
|
1111
|
+
output_detection['label'] = category_name
|
|
1112
|
+
output_detection['category'] = md_category_name_to_id[md_category_name]
|
|
1113
|
+
output_detection['conf'] = 1.0
|
|
1114
|
+
output_detection['bbox'] = [0.0, 0.0, 1.0, 1.0]
|
|
1115
|
+
output_prediction['detections'] = [output_detection]
|
|
1116
|
+
output_predictions.append(output_prediction)
|
|
1117
|
+
|
|
1118
|
+
ensemble_prediction = {}
|
|
1119
|
+
ensemble_prediction['filepath'] = prediction['filepath']
|
|
1120
|
+
ensemble_prediction['detections'] = [output_detection]
|
|
1121
|
+
ensemble_prediction['prediction'] = top_classification
|
|
1122
|
+
ensemble_prediction['prediction_score'] = top_classification_score
|
|
1123
|
+
ensemble_prediction['prediction_source'] = 'fake_ensemble_file_utility'
|
|
1124
|
+
ensemble_prediction['classifications'] = prediction['classifications']
|
|
1125
|
+
ensemble_predictions.append(ensemble_prediction)
|
|
1126
|
+
|
|
1127
|
+
# ...for each image
|
|
1128
|
+
|
|
1129
|
+
## Write output
|
|
1130
|
+
|
|
1131
|
+
if ensemble_json_file is not None:
|
|
1132
|
+
|
|
1133
|
+
ensemble_output_data = {'predictions':ensemble_predictions}
|
|
1134
|
+
with open(ensemble_json_file,'w') as f:
|
|
1135
|
+
json.dump(ensemble_output_data,f,indent=1)
|
|
1136
|
+
_ = validate_predictions_file(ensemble_json_file)
|
|
1137
|
+
|
|
1138
|
+
output_data = {'predictions':output_predictions}
|
|
1139
|
+
with open(detections_json_file,'w') as f:
|
|
1140
|
+
json.dump(output_data,f,indent=1)
|
|
1141
|
+
return validate_predictions_file(detections_json_file)
|
|
1142
|
+
|
|
1143
|
+
# ...def generate_whole_image_detections_for_classifications(...)
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
def generate_md_results_from_predictions_json(predictions_json_file,
|
|
1147
|
+
md_results_file,
|
|
1148
|
+
base_folder=None,
|
|
1149
|
+
max_decimals=5,
|
|
1150
|
+
convert_human_to_person=True):
|
|
1151
|
+
"""
|
|
1152
|
+
Generate an MD-formatted .json file from a predictions.json file, generated by the
|
|
1153
|
+
SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
|
|
1154
|
+
files use absolute paths, so this function optionally removes the leading string
|
|
1155
|
+
[base_folder] from all file names.
|
|
1156
|
+
|
|
1157
|
+
Currently just applies the top classification category to every detection. If the top
|
|
1158
|
+
classification is "blank", writes an empty detection list.
|
|
1159
|
+
|
|
1160
|
+
Uses the classification from the "prediction" field if it's available, otherwise
|
|
1161
|
+
uses the "classifications" field.
|
|
920
1162
|
|
|
921
|
-
|
|
922
|
-
|
|
1163
|
+
When using the "prediction" field, records the top class in the "classifications" field to
|
|
1164
|
+
a field in each image called "top_classification_common_name". This is often different
|
|
1165
|
+
from the value of the "prediction" field.
|
|
923
1166
|
|
|
924
|
-
|
|
1167
|
+
speciesnet_to_md.py is a command-line driver for this function.
|
|
925
1168
|
|
|
926
1169
|
Args:
|
|
927
|
-
predictions_json_file (str): path to a predictions.json file
|
|
1170
|
+
predictions_json_file (str): path to a predictions.json file, or a dict
|
|
928
1171
|
md_results_file (str): path to which we should write an MD-formatted .json file
|
|
929
|
-
base_folder (str, optional): leading string to remove from each path in the
|
|
1172
|
+
base_folder (str, optional): leading string to remove from each path in the
|
|
1173
|
+
predictions.json file
|
|
1174
|
+
max_decimals (int, optional): number of decimal places to which we should round
|
|
1175
|
+
all values
|
|
1176
|
+
convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
|
|
1177
|
+
detection category "human"; MD files usually use "person". If True, switches "human"
|
|
1178
|
+
to "person".
|
|
930
1179
|
"""
|
|
931
1180
|
|
|
932
1181
|
# Read predictions file
|
|
933
|
-
|
|
934
|
-
|
|
1182
|
+
if isinstance(predictions_json_file,str):
|
|
1183
|
+
with open(predictions_json_file,'r') as f:
|
|
1184
|
+
predictions = json.load(f)
|
|
1185
|
+
else:
|
|
1186
|
+
assert isinstance(predictions_json_file,dict)
|
|
1187
|
+
predictions = predictions_json_file
|
|
1188
|
+
|
|
1189
|
+
# Round floating-point values (confidence scores, coordinates) to a
|
|
1190
|
+
# reasonable number of decimal places
|
|
1191
|
+
if max_decimals is not None and max_decimals > 0:
|
|
1192
|
+
round_floats_in_nested_dict(predictions)
|
|
1193
|
+
|
|
935
1194
|
predictions = predictions['predictions']
|
|
936
1195
|
assert isinstance(predictions,list)
|
|
937
1196
|
|
|
938
|
-
|
|
1197
|
+
# Convert backslashes to forward slashes in both filenames and the base folder string
|
|
1198
|
+
for im in predictions:
|
|
1199
|
+
im['filepath'] = im['filepath'].replace('\\','/')
|
|
1200
|
+
if base_folder is not None:
|
|
1201
|
+
base_folder = base_folder.replace('\\','/')
|
|
939
1202
|
|
|
940
1203
|
detection_category_id_to_name = {}
|
|
941
1204
|
classification_category_name_to_id = {}
|
|
@@ -948,6 +1211,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
948
1211
|
# Create the output images list
|
|
949
1212
|
images_out = []
|
|
950
1213
|
|
|
1214
|
+
base_folder_replacements = 0
|
|
1215
|
+
|
|
951
1216
|
# im_in = predictions[0]
|
|
952
1217
|
for im_in in predictions:
|
|
953
1218
|
|
|
@@ -957,6 +1222,7 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
957
1222
|
fn = im_in['filepath']
|
|
958
1223
|
if base_folder is not None:
|
|
959
1224
|
if fn.startswith(base_folder):
|
|
1225
|
+
base_folder_replacements += 1
|
|
960
1226
|
fn = fn.replace(base_folder,'',1)
|
|
961
1227
|
|
|
962
1228
|
im_out['file'] = fn
|
|
@@ -990,7 +1256,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
990
1256
|
# ...if detections are present
|
|
991
1257
|
|
|
992
1258
|
class_to_assign = None
|
|
993
|
-
class_confidence = None
|
|
1259
|
+
class_confidence = None
|
|
1260
|
+
top_classification_common_name = None
|
|
994
1261
|
|
|
995
1262
|
if 'classifications' in im_in:
|
|
996
1263
|
|
|
@@ -1000,8 +1267,15 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
1000
1267
|
class_to_assign = classifications['classes'][0]
|
|
1001
1268
|
class_confidence = classifications['scores'][0]
|
|
1002
1269
|
|
|
1270
|
+
tokens = class_to_assign.split(';')
|
|
1271
|
+
assert len(tokens) == 7
|
|
1272
|
+
top_classification_common_name = tokens[-1]
|
|
1273
|
+
if len(top_classification_common_name) == 0:
|
|
1274
|
+
top_classification_common_name = 'undefined'
|
|
1275
|
+
|
|
1003
1276
|
if 'prediction' in im_in:
|
|
1004
1277
|
|
|
1278
|
+
im_out['top_classification_common_name'] = top_classification_common_name
|
|
1005
1279
|
class_to_assign = im_in['prediction']
|
|
1006
1280
|
class_confidence = im_in['prediction_score']
|
|
1007
1281
|
|
|
@@ -1056,8 +1330,12 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
1056
1330
|
|
|
1057
1331
|
# ...for each image
|
|
1058
1332
|
|
|
1059
|
-
|
|
1060
|
-
|
|
1333
|
+
if base_folder is not None:
|
|
1334
|
+
if base_folder_replacements == 0:
|
|
1335
|
+
print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
|
|
1336
|
+
base_folder))
|
|
1337
|
+
|
|
1338
|
+
# Fix the 'unknown' category
|
|
1061
1339
|
if len(all_unknown_detections) > 0:
|
|
1062
1340
|
|
|
1063
1341
|
max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
|
|
@@ -1075,7 +1353,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
1075
1353
|
|
|
1076
1354
|
# Prepare friendly classification names
|
|
1077
1355
|
|
|
1078
|
-
classification_category_descriptions =
|
|
1356
|
+
classification_category_descriptions = \
|
|
1357
|
+
invert_dictionary(classification_category_name_to_id)
|
|
1079
1358
|
classification_categories_out = {}
|
|
1080
1359
|
for category_id in classification_category_descriptions.keys():
|
|
1081
1360
|
category_name = classification_category_descriptions[category_id].split(';')[-1]
|
|
@@ -1088,6 +1367,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
1088
1367
|
info['format_version'] = 1.4
|
|
1089
1368
|
info['detector'] = 'converted_from_predictions_json'
|
|
1090
1369
|
|
|
1370
|
+
if convert_human_to_person:
|
|
1371
|
+
for k in detection_categories_out.keys():
|
|
1372
|
+
if detection_categories_out[k] == 'human':
|
|
1373
|
+
detection_categories_out[k] = 'person'
|
|
1374
|
+
|
|
1091
1375
|
output_dict = {}
|
|
1092
1376
|
output_dict['info'] = info
|
|
1093
1377
|
output_dict['detection_categories'] = detection_categories_out
|
|
@@ -1105,7 +1389,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
|
|
|
1105
1389
|
# ...def generate_md_results_from_predictions_json(...)
|
|
1106
1390
|
|
|
1107
1391
|
|
|
1108
|
-
def generate_predictions_json_from_md_results(md_results_file,
|
|
1392
|
+
def generate_predictions_json_from_md_results(md_results_file,
|
|
1393
|
+
predictions_json_file,
|
|
1394
|
+
base_folder=None):
|
|
1109
1395
|
"""
|
|
1110
1396
|
Generate a predictions.json file from the MD-formatted .json file [md_results_file]. Typically,
|
|
1111
1397
|
MD results files use relative paths, and predictions.json files use absolute paths, so
|
|
@@ -1166,12 +1452,16 @@ def generate_predictions_json_from_md_results(md_results_file,predictions_json_f
|
|
|
1166
1452
|
# ...def generate_predictions_json_from_md_results(...)
|
|
1167
1453
|
|
|
1168
1454
|
|
|
1455
|
+
default_tokens_to_ignore = ['$RECYCLE.BIN']
|
|
1456
|
+
|
|
1169
1457
|
def generate_instances_json_from_folder(folder,
|
|
1170
1458
|
country=None,
|
|
1459
|
+
admin1_region=None,
|
|
1171
1460
|
lat=None,
|
|
1172
1461
|
lon=None,
|
|
1173
1462
|
output_file=None,
|
|
1174
|
-
filename_replacements=None
|
|
1463
|
+
filename_replacements=None,
|
|
1464
|
+
tokens_to_ignore=default_tokens_to_ignore):
|
|
1175
1465
|
"""
|
|
1176
1466
|
Generate an instances.json record that contains all images in [folder], optionally
|
|
1177
1467
|
including location information, in a format suitable for run_model.py. Optionally writes
|
|
@@ -1186,6 +1476,8 @@ def generate_instances_json_from_folder(folder,
|
|
|
1186
1476
|
filename_replacements (dict, optional): str --> str dict indicating filename substrings
|
|
1187
1477
|
that should be replaced with other strings. Replacement occurs *after* converting
|
|
1188
1478
|
backslashes to forward slashes.
|
|
1479
|
+
tokens_to_ignore (list, optional): ignore any images with these tokens in their
|
|
1480
|
+
names, typically used to avoid $RECYCLE.BIN. Can be None.
|
|
1189
1481
|
|
|
1190
1482
|
Returns:
|
|
1191
1483
|
dict: dict with at least the field "instances"
|
|
@@ -1195,6 +1487,13 @@ def generate_instances_json_from_folder(folder,
|
|
|
1195
1487
|
|
|
1196
1488
|
image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
|
|
1197
1489
|
|
|
1490
|
+
if tokens_to_ignore is not None:
|
|
1491
|
+
n_images_before_ignore_tokens = len(image_files_abs)
|
|
1492
|
+
for token in tokens_to_ignore:
|
|
1493
|
+
image_files_abs = [fn for fn in image_files_abs if token not in fn]
|
|
1494
|
+
print('After ignoring {} tokens, kept {} of {} images'.format(
|
|
1495
|
+
len(tokens_to_ignore),len(image_files_abs),n_images_before_ignore_tokens))
|
|
1496
|
+
|
|
1198
1497
|
instances = []
|
|
1199
1498
|
|
|
1200
1499
|
# image_fn_abs = image_files_abs[0]
|
|
@@ -1206,6 +1505,8 @@ def generate_instances_json_from_folder(folder,
|
|
|
1206
1505
|
instance['filepath'] = instance['filepath'].replace(s,filename_replacements[s])
|
|
1207
1506
|
if country is not None:
|
|
1208
1507
|
instance['country'] = country
|
|
1508
|
+
if admin1_region is not None:
|
|
1509
|
+
instance['admin1_region'] = admin1_region
|
|
1209
1510
|
if lat is not None:
|
|
1210
1511
|
assert lon is not None, 'Latitude provided without longitude'
|
|
1211
1512
|
instance['latitude'] = lat
|
|
@@ -1226,14 +1527,286 @@ def generate_instances_json_from_folder(folder,
|
|
|
1226
1527
|
# ...def generate_instances_json_from_folder(...)
|
|
1227
1528
|
|
|
1228
1529
|
|
|
1229
|
-
|
|
1530
|
+
def split_instances_into_n_batches(instances_json,n_batches,output_files=None):
|
|
1531
|
+
"""
|
|
1532
|
+
Given an instances.json file, split it into batches of equal size.
|
|
1533
|
+
|
|
1534
|
+
Args:
|
|
1535
|
+
instances_json (str): input .json file in
|
|
1536
|
+
n_batches (int): number of new files to generate
|
|
1537
|
+
output_files (list, optional): output .json files for each
|
|
1538
|
+
batch. If supplied, should have length [n_batches]. If not
|
|
1539
|
+
supplied, filenames will be generated based on [instances_json].
|
|
1540
|
+
|
|
1541
|
+
Returns:
|
|
1542
|
+
list: list of output files that were written; identical to [output_files]
|
|
1543
|
+
if it was supplied as input.
|
|
1544
|
+
"""
|
|
1545
|
+
|
|
1546
|
+
with open(instances_json,'r') as f:
|
|
1547
|
+
instances = json.load(f)
|
|
1548
|
+
assert isinstance(instances,dict) and 'instances' in instances
|
|
1549
|
+
instances = instances['instances']
|
|
1550
|
+
|
|
1551
|
+
if output_files is not None:
|
|
1552
|
+
assert len(output_files) == n_batches, \
|
|
1553
|
+
'Expected {} output files, received {}'.format(
|
|
1554
|
+
n_batches,len(output_files))
|
|
1555
|
+
else:
|
|
1556
|
+
output_files = []
|
|
1557
|
+
for i_batch in range(0,n_batches):
|
|
1558
|
+
batch_string = 'batch_{}'.format(str(i_batch).zfill(3))
|
|
1559
|
+
output_files.append(insert_before_extension(instances_json,batch_string))
|
|
1560
|
+
|
|
1561
|
+
batches = split_list_into_n_chunks(instances, n_batches)
|
|
1562
|
+
|
|
1563
|
+
for i_batch,batch in enumerate(batches):
|
|
1564
|
+
batch_dict = {'instances':batch}
|
|
1565
|
+
with open(output_files[i_batch],'w') as f:
|
|
1566
|
+
json.dump(batch_dict,f,indent=1)
|
|
1567
|
+
|
|
1568
|
+
print('Wrote {} batches to file'.format(n_batches))
|
|
1569
|
+
|
|
1570
|
+
return output_files
|
|
1571
|
+
|
|
1572
|
+
|
|
1573
|
+
def merge_prediction_json_files(input_prediction_files,output_prediction_file):
|
|
1574
|
+
"""
|
|
1575
|
+
Merge all predictions.json files in [files] into a single .json file.
|
|
1576
|
+
|
|
1577
|
+
Args:
|
|
1578
|
+
files (list): list of predictions.json files to merge
|
|
1579
|
+
output_file (str): output .json file
|
|
1580
|
+
"""
|
|
1581
|
+
|
|
1582
|
+
predictions = []
|
|
1583
|
+
image_filenames_processed = set()
|
|
1584
|
+
|
|
1585
|
+
# input_json_fn = input_prediction_files[0]
|
|
1586
|
+
for input_json_fn in tqdm(input_prediction_files):
|
|
1587
|
+
|
|
1588
|
+
assert os.path.isfile(input_json_fn), \
|
|
1589
|
+
'Could not find prediction file {}'.format(input_json_fn)
|
|
1590
|
+
with open(input_json_fn,'r') as f:
|
|
1591
|
+
results_this_file = json.load(f)
|
|
1592
|
+
assert isinstance(results_this_file,dict)
|
|
1593
|
+
predictions_this_file = results_this_file['predictions']
|
|
1594
|
+
for prediction in predictions_this_file:
|
|
1595
|
+
image_fn = prediction['filepath']
|
|
1596
|
+
assert image_fn not in image_filenames_processed
|
|
1597
|
+
predictions.extend(predictions_this_file)
|
|
1598
|
+
|
|
1599
|
+
output_dict = {'predictions':predictions}
|
|
1600
|
+
|
|
1601
|
+
os.makedirs(os.path.dirname(output_prediction_file),exist_ok=True)
|
|
1602
|
+
with open(output_prediction_file,'w') as f:
|
|
1603
|
+
json.dump(output_dict,f,indent=1)
|
|
1604
|
+
|
|
1605
|
+
# ...def merge_prediction_json_files(...)
|
|
1606
|
+
|
|
1607
|
+
|
|
1608
|
+
def validate_predictions_file(fn,instances=None,verbose=True):
|
|
1609
|
+
"""
|
|
1610
|
+
Validate the predictions.json file [fn].
|
|
1611
|
+
|
|
1612
|
+
Args:
|
|
1613
|
+
fn (str): a .json file in predictions.json (SpeciesNet) format
|
|
1614
|
+
instances (str or list, optional): a folder, instances.json file,
|
|
1615
|
+
or dict loaded from an instances.json file. If supplied, this
|
|
1616
|
+
function will verify that [fn] contains the same number of
|
|
1617
|
+
images as [instances].
|
|
1618
|
+
verbose (bool, optional): enable additional debug output
|
|
1619
|
+
|
|
1620
|
+
Returns:
|
|
1621
|
+
dict: the contents of [fn]
|
|
1622
|
+
"""
|
|
1623
|
+
|
|
1624
|
+
with open(fn,'r') as f:
|
|
1625
|
+
d = json.load(f)
|
|
1626
|
+
predictions = d['predictions']
|
|
1627
|
+
|
|
1628
|
+
failures = []
|
|
1629
|
+
|
|
1630
|
+
for im in predictions:
|
|
1631
|
+
if 'failures' in im:
|
|
1632
|
+
failures.append(im)
|
|
1633
|
+
|
|
1634
|
+
if verbose:
|
|
1635
|
+
print('Read predictions for {} images, with {} failure(s)'.format(
|
|
1636
|
+
len(d['predictions']),len(failures)))
|
|
1637
|
+
|
|
1638
|
+
if instances is not None:
|
|
1639
|
+
|
|
1640
|
+
if isinstance(instances,str):
|
|
1641
|
+
if os.path.isdir(instances):
|
|
1642
|
+
instances = generate_instances_json_from_folder(folder=instances)
|
|
1643
|
+
elif os.path.isfile(instances):
|
|
1644
|
+
with open(instances,'r') as f:
|
|
1645
|
+
instances = json.load(f)
|
|
1646
|
+
else:
|
|
1647
|
+
raise ValueError('Could not find instances file/folder {}'.format(
|
|
1648
|
+
instances))
|
|
1649
|
+
assert isinstance(instances,dict)
|
|
1650
|
+
assert 'instances' in instances
|
|
1651
|
+
instances = instances['instances']
|
|
1652
|
+
if verbose:
|
|
1653
|
+
print('Expected results for {} files'.format(len(instances)))
|
|
1654
|
+
assert len(instances) == len(predictions), \
|
|
1655
|
+
'{} instances expected, {} found'.format(
|
|
1656
|
+
len(instances),len(predictions))
|
|
1657
|
+
|
|
1658
|
+
expected_files = set([instance['filepath'] for instance in instances])
|
|
1659
|
+
found_files = set([prediction['filepath'] for prediction in predictions])
|
|
1660
|
+
assert expected_files == found_files
|
|
1661
|
+
|
|
1662
|
+
# ...if a list of instances was supplied
|
|
1663
|
+
|
|
1664
|
+
return d
|
|
1665
|
+
|
|
1666
|
+
# ...def validate_predictions_file(...)
|
|
1667
|
+
|
|
1668
|
+
|
|
1669
|
+
def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
|
|
1670
|
+
"""
|
|
1671
|
+
Count the number of instances of each unique change made by the geofence.
|
|
1672
|
+
|
|
1673
|
+
Args:
|
|
1674
|
+
ensemble_json_file (str): SpeciesNet-formatted .json file produced
|
|
1675
|
+
by the full ensemble.
|
|
1676
|
+
use_latin_names (bool, optional): return a mapping using binomial names
|
|
1677
|
+
rather than common names.
|
|
1678
|
+
|
|
1679
|
+
Returns:
|
|
1680
|
+
dict: maps strings that look like "puma,felidae family" to integers,
|
|
1681
|
+
where that entry would indicate the number of times that "puma" was
|
|
1682
|
+
predicted, but mapped to family level by the geofence. Sorted in
|
|
1683
|
+
descending order by count.
|
|
1684
|
+
"""
|
|
1685
|
+
|
|
1686
|
+
# Load and validate ensemble results
|
|
1687
|
+
ensemble_results = validate_predictions_file(ensemble_json_file)
|
|
1688
|
+
|
|
1689
|
+
assert isinstance(ensemble_results,dict)
|
|
1690
|
+
predictions = ensemble_results['predictions']
|
|
1691
|
+
|
|
1692
|
+
# Maps comma-separated pairs of common names (or binomial names) to
|
|
1693
|
+
# the number of times that transition (first --> second) happened
|
|
1694
|
+
rollup_pair_to_count = defaultdict(int)
|
|
1695
|
+
|
|
1696
|
+
# prediction = predictions[0]
|
|
1697
|
+
for prediction in tqdm(predictions):
|
|
1698
|
+
|
|
1699
|
+
if 'failures' in prediction and \
|
|
1700
|
+
prediction['failures'] is not None and \
|
|
1701
|
+
len(prediction['failures']) > 0:
|
|
1702
|
+
continue
|
|
1703
|
+
|
|
1704
|
+
assert 'prediction_source' in prediction, \
|
|
1705
|
+
'Prediction present without [prediction_source] field, are you sure this ' + \
|
|
1706
|
+
'is an ensemble output file?'
|
|
1707
|
+
|
|
1708
|
+
if 'geofence' in prediction['prediction_source']:
|
|
1709
|
+
|
|
1710
|
+
classification_taxonomy_string = \
|
|
1711
|
+
prediction['classifications']['classes'][0]
|
|
1712
|
+
prediction_taxonomy_string = prediction['prediction']
|
|
1713
|
+
assert is_valid_prediction_string(classification_taxonomy_string)
|
|
1714
|
+
assert is_valid_prediction_string(prediction_taxonomy_string)
|
|
1715
|
+
|
|
1716
|
+
# Typical examples:
|
|
1717
|
+
# '86f5b978-4f30-40cc-bd08-be9e3fba27a0;mammalia;rodentia;sciuridae;sciurus;carolinensis;eastern gray squirrel'
|
|
1718
|
+
# 'e4d1e892-0e4b-475a-a8ac-b5c3502e0d55;mammalia;rodentia;sciuridae;;;sciuridae family'
|
|
1719
|
+
classification_common_name = classification_taxonomy_string.split(';')[-1]
|
|
1720
|
+
prediction_common_name = prediction_taxonomy_string.split(';')[-1]
|
|
1721
|
+
classification_binomial_name = classification_taxonomy_string.split(';')[-2]
|
|
1722
|
+
prediction_binomial_name = prediction_taxonomy_string.split(';')[-2]
|
|
1723
|
+
|
|
1724
|
+
input_name = classification_binomial_name if use_latin_names else \
|
|
1725
|
+
classification_common_name
|
|
1726
|
+
output_name = prediction_binomial_name if use_latin_names else \
|
|
1727
|
+
prediction_common_name
|
|
1728
|
+
|
|
1729
|
+
rollup_pair = input_name.strip() + ',' + output_name.strip()
|
|
1730
|
+
rollup_pair_to_count[rollup_pair] += 1
|
|
1731
|
+
|
|
1732
|
+
# ...if we made a geofencing change
|
|
1733
|
+
|
|
1734
|
+
# ...for each prediction
|
|
1735
|
+
|
|
1736
|
+
rollup_pair_to_count = sort_dictionary_by_value(rollup_pair_to_count,reverse=True)
|
|
1737
|
+
|
|
1738
|
+
return rollup_pair_to_count
|
|
1739
|
+
|
|
1740
|
+
# ...def find_geofence_adjustments(...)
|
|
1741
|
+
|
|
1742
|
+
|
|
1743
|
+
def generate_geofence_adjustment_html_summary(rollup_pair_to_count,min_count=10):
|
|
1744
|
+
"""
|
|
1745
|
+
Given a list of geofence rollups, likely generated by find_geofence_adjustments,
|
|
1746
|
+
generate an HTML summary of the changes made by geofencing. The resulting HTML
|
|
1747
|
+
is wrapped in <div>, but not, for example, in <html> or <body>.
|
|
1748
|
+
|
|
1749
|
+
Args:
|
|
1750
|
+
rollup_pair_to_count (dict): list of changes made by geofencing, see
|
|
1751
|
+
find_geofence_adjustments for details
|
|
1752
|
+
min_count (int, optional): minimum number of changes a pair needs in order
|
|
1753
|
+
to be included in the report.
|
|
1754
|
+
"""
|
|
1755
|
+
|
|
1756
|
+
geofence_footer = ''
|
|
1757
|
+
|
|
1758
|
+
# Restrict to the list of taxa that were impacted by geofencing
|
|
1759
|
+
rollup_pair_to_count = \
|
|
1760
|
+
{key: value for key, value in rollup_pair_to_count.items() if value >= min_count}
|
|
1761
|
+
|
|
1762
|
+
# rollup_pair_to_count is sorted in descending order by count
|
|
1763
|
+
assert is_list_sorted(list(rollup_pair_to_count.values()),reverse=True)
|
|
1764
|
+
|
|
1765
|
+
if len(rollup_pair_to_count) > 0:
|
|
1766
|
+
|
|
1767
|
+
geofence_footer = \
|
|
1768
|
+
'<h3>Geofence changes that occurred more than {} times</h3>\n'.format(min_count)
|
|
1769
|
+
geofence_footer += '<div class="contentdiv">\n'
|
|
1770
|
+
|
|
1771
|
+
print('\nRollup changes with count > {}:'.format(min_count))
|
|
1772
|
+
for rollup_pair in rollup_pair_to_count.keys():
|
|
1773
|
+
count = rollup_pair_to_count[rollup_pair]
|
|
1774
|
+
rollup_pair_s = rollup_pair.replace(',',' --> ')
|
|
1775
|
+
print('{}: {}'.format(rollup_pair_s,count))
|
|
1776
|
+
rollup_pair_html = rollup_pair.replace(',',' → ')
|
|
1777
|
+
geofence_footer += '{} ({})<br/>\n'.format(rollup_pair_html,count)
|
|
1778
|
+
|
|
1779
|
+
geofence_footer += '</div>\n'
|
|
1780
|
+
|
|
1781
|
+
return geofence_footer
|
|
1782
|
+
|
|
1783
|
+
# ...def generate_geofence_adjustment_html_summary(...)
|
|
1784
|
+
|
|
1785
|
+
|
|
1786
|
+
#%% Module-level globals related to taxonomy mapping and geofencing
|
|
1230
1787
|
|
|
1231
1788
|
# This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
|
|
1232
1789
|
# a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
|
|
1233
1790
|
taxonomy_string_to_taxonomy_info = None
|
|
1791
|
+
|
|
1792
|
+
# Maps a binomial name (one, two, or three ws-delimited tokens) to the same dict described above.
|
|
1234
1793
|
binomial_name_to_taxonomy_info = None
|
|
1794
|
+
|
|
1795
|
+
# Maps a common name to the same dict described above
|
|
1235
1796
|
common_name_to_taxonomy_info = None
|
|
1236
1797
|
|
|
1798
|
+
# Dict mapping 5-token semicolon-delimited taxonomy strings to geofencing rules
|
|
1799
|
+
taxonomy_string_to_geofencing_rules = None
|
|
1800
|
+
|
|
1801
|
+
# Maps lower-case country names to upper-case country codes
|
|
1802
|
+
country_to_country_code = None
|
|
1803
|
+
|
|
1804
|
+
# Maps upper-case country codes to lower-case country names
|
|
1805
|
+
country_code_to_country = None
|
|
1806
|
+
|
|
1807
|
+
|
|
1808
|
+
#%% Functions related to geofencing and taxonomy mapping
|
|
1809
|
+
|
|
1237
1810
|
def taxonomy_info_to_taxonomy_string(taxonomy_info):
|
|
1238
1811
|
"""
|
|
1239
1812
|
Convert a taxonomy record in dict format to a semicolon-delimited string
|
|
@@ -1258,12 +1831,16 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
|
|
|
1258
1831
|
[common_name_to_taxonomy_info].
|
|
1259
1832
|
|
|
1260
1833
|
Args:
|
|
1261
|
-
taxonomy_file (str): .json file containing
|
|
1834
|
+
taxonomy_file (str): .json file containing mappings from the short taxonomy strings
|
|
1835
|
+
to the longer strings with GUID and common name, see example below.
|
|
1262
1836
|
force_init (bool, optional): if the output dicts already exist, should we
|
|
1263
1837
|
re-initialize anyway?
|
|
1264
1838
|
encoding (str, optional): character encoding to use when opening the .json file
|
|
1265
1839
|
"""
|
|
1266
1840
|
|
|
1841
|
+
if encoding is None:
|
|
1842
|
+
encoding = 'cp1252'
|
|
1843
|
+
|
|
1267
1844
|
global taxonomy_string_to_taxonomy_info
|
|
1268
1845
|
global binomial_name_to_taxonomy_info
|
|
1269
1846
|
global common_name_to_taxonomy_info
|
|
@@ -1322,26 +1899,174 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
|
|
|
1322
1899
|
common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
|
|
1323
1900
|
|
|
1324
1901
|
taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
|
|
1325
|
-
|
|
1902
|
+
|
|
1903
|
+
binomial_name = None
|
|
1904
|
+
if len(tokens[4]) > 0 and len(tokens[5]) > 0:
|
|
1905
|
+
# strip(), but don't remove spaces from the species name;
|
|
1906
|
+
# subspecies are separated with a space, e.g. canis;lupus dingo
|
|
1907
|
+
binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
|
|
1908
|
+
elif len(tokens[4]) > 0:
|
|
1909
|
+
binomial_name = tokens[4].strip()
|
|
1910
|
+
elif len(tokens[3]) > 0:
|
|
1911
|
+
binomial_name = tokens[3].strip()
|
|
1912
|
+
elif len(tokens[2]) > 0:
|
|
1913
|
+
binomial_name = tokens[2].strip()
|
|
1914
|
+
elif len(tokens[1]) > 0:
|
|
1915
|
+
binomial_name = tokens[1].strip()
|
|
1916
|
+
if binomial_name is None:
|
|
1326
1917
|
# print('Warning: no binomial name for {}'.format(taxonomy_string))
|
|
1327
1918
|
pass
|
|
1328
1919
|
else:
|
|
1329
|
-
binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
|
|
1330
1920
|
binomial_name_to_taxonomy_info[binomial_name] = taxon_info
|
|
1921
|
+
|
|
1922
|
+
print('Created {} records in taxonomy_string_to_taxonomy_info'.format(len(taxonomy_string_to_taxonomy_info)))
|
|
1923
|
+
print('Created {} records in common_name_to_taxonomy_info'.format(len(common_name_to_taxonomy_info)))
|
|
1331
1924
|
|
|
1332
1925
|
# ...def initialize_taxonomy_info(...)
|
|
1333
1926
|
|
|
1334
1927
|
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1928
|
+
def _parse_code_list(codes):
|
|
1929
|
+
"""
|
|
1930
|
+
Turn a list of country or state codes in string, delimited string, or list format
|
|
1931
|
+
into a list. Also does basic validity checking.
|
|
1932
|
+
"""
|
|
1933
|
+
|
|
1934
|
+
if not isinstance(codes,list):
|
|
1935
|
+
|
|
1936
|
+
assert isinstance(codes,str)
|
|
1937
|
+
|
|
1938
|
+
codes = codes.strip()
|
|
1939
|
+
|
|
1940
|
+
# This is just a single codes
|
|
1941
|
+
if ',' not in codes:
|
|
1942
|
+
codes = [codes]
|
|
1943
|
+
else:
|
|
1944
|
+
codes = codes.split(',')
|
|
1945
|
+
codes = [c.strip() for c in codes]
|
|
1946
|
+
|
|
1947
|
+
assert isinstance(codes,list)
|
|
1948
|
+
|
|
1949
|
+
codes = [c.upper().strip() for c in codes]
|
|
1950
|
+
|
|
1951
|
+
for c in codes:
|
|
1952
|
+
assert len(c) in (2,3)
|
|
1953
|
+
|
|
1954
|
+
return codes
|
|
1955
|
+
|
|
1956
|
+
|
|
1957
|
+
def _generate_csv_rows_to_block_all_countries_except(
|
|
1958
|
+
species_string,
|
|
1959
|
+
block_except_list):
|
|
1960
|
+
"""
|
|
1961
|
+
Generate rows in the format expected by geofence_fixes.csv, representing a list of
|
|
1962
|
+
allow and block rules to block all countries currently allowed for this species
|
|
1963
|
+
except [allow_countries], and add allow rules these countries.
|
|
1964
|
+
"""
|
|
1965
|
+
|
|
1966
|
+
assert is_valid_taxonomy_string(species_string), \
|
|
1967
|
+
'{} is not a valid taxonomy string'.format(species_string)
|
|
1968
|
+
|
|
1969
|
+
global taxonomy_string_to_taxonomy_info
|
|
1970
|
+
global binomial_name_to_taxonomy_info
|
|
1971
|
+
global common_name_to_taxonomy_info
|
|
1972
|
+
|
|
1973
|
+
assert taxonomy_string_to_geofencing_rules is not None, \
|
|
1974
|
+
'Initialize geofencing prior to species lookup'
|
|
1975
|
+
assert taxonomy_string_to_taxonomy_info is not None, \
|
|
1976
|
+
'Initialize taxonomy lookup prior to species lookup'
|
|
1977
|
+
|
|
1978
|
+
geofencing_rules_this_species = \
|
|
1979
|
+
taxonomy_string_to_geofencing_rules[species_string]
|
|
1980
|
+
|
|
1981
|
+
allowed_countries = []
|
|
1982
|
+
if 'allow' in geofencing_rules_this_species:
|
|
1983
|
+
allowed_countries.extend(geofencing_rules_this_species['allow'])
|
|
1984
|
+
|
|
1985
|
+
blocked_countries = []
|
|
1986
|
+
if 'block' in geofencing_rules_this_species:
|
|
1987
|
+
blocked_countries.extend(geofencing_rules_this_species['block'])
|
|
1988
|
+
|
|
1989
|
+
block_except_list = _parse_code_list(block_except_list)
|
|
1990
|
+
|
|
1991
|
+
countries_to_block = []
|
|
1992
|
+
countries_to_allow = []
|
|
1993
|
+
|
|
1994
|
+
# country = allowed_countries[0]
|
|
1995
|
+
for country in allowed_countries:
|
|
1996
|
+
if country not in block_except_list and country not in blocked_countries:
|
|
1997
|
+
countries_to_block.append(country)
|
|
1998
|
+
|
|
1999
|
+
for country in block_except_list:
|
|
2000
|
+
if country in blocked_countries:
|
|
2001
|
+
raise ValueError("I can't allow a country that has already been blocked")
|
|
2002
|
+
if country not in allowed_countries:
|
|
2003
|
+
countries_to_allow.append(country)
|
|
2004
|
+
|
|
2005
|
+
rows = generate_csv_rows_for_species(species_string,
|
|
2006
|
+
allow_countries=countries_to_allow,
|
|
2007
|
+
block_countries=countries_to_block)
|
|
2008
|
+
|
|
2009
|
+
return rows
|
|
2010
|
+
|
|
2011
|
+
# ...def _generate_csv_rows_to_block_all_countries_except(...)
|
|
2012
|
+
|
|
2013
|
+
|
|
2014
|
+
def generate_csv_rows_for_species(species_string,
|
|
2015
|
+
allow_countries=None,
|
|
2016
|
+
block_countries=None,
|
|
2017
|
+
allow_states=None,
|
|
2018
|
+
block_states=None,
|
|
2019
|
+
blockexcept_countries=None):
|
|
2020
|
+
"""
|
|
2021
|
+
Generate rows in the format expected by geofence_fixes.csv, representing a list of
|
|
2022
|
+
allow and/or block rules for the specified species and countries/states. Does not check
|
|
2023
|
+
that the rules make sense; e.g. nothing will stop you in this function from both allowing
|
|
2024
|
+
and blocking a country.
|
|
2025
|
+
|
|
2026
|
+
Args:
|
|
2027
|
+
species_string (str): five-token string in semicolon-delimited WI taxonomy format
|
|
2028
|
+
allow_countries (optional, list or str): three-letter country codes, list of
|
|
2029
|
+
country codes, or comma-separated list of country codes to allow
|
|
2030
|
+
block_countries (optional, list or str): three-letter country codes, list of
|
|
2031
|
+
country codes, or comma-separated list of country codes to block
|
|
2032
|
+
allow_states (optional, list or str): two-letter state codes, list of
|
|
2033
|
+
state codes, or comma-separated list of state codes to allow
|
|
2034
|
+
block_states (optional, list or str): two-letter state code, list of
|
|
2035
|
+
state codes, or comma-separated list of state codes to block
|
|
2036
|
+
|
|
2037
|
+
Returns:
|
|
2038
|
+
list of str: lines ready to be pasted into geofence_fixes.csv
|
|
2039
|
+
"""
|
|
2040
|
+
|
|
2041
|
+
assert is_valid_taxonomy_string(species_string), \
|
|
2042
|
+
'{} is not a valid taxonomy string'.format(species_string)
|
|
2043
|
+
|
|
2044
|
+
lines = []
|
|
2045
|
+
|
|
2046
|
+
if allow_countries is not None:
|
|
2047
|
+
allow_countries = _parse_code_list(allow_countries)
|
|
2048
|
+
for country in allow_countries:
|
|
2049
|
+
lines.append(species_string + ',allow,' + country + ',')
|
|
2050
|
+
|
|
2051
|
+
if block_countries is not None:
|
|
2052
|
+
block_countries = _parse_code_list(block_countries)
|
|
2053
|
+
for country in block_countries:
|
|
2054
|
+
lines.append(species_string + ',block,' + country + ',')
|
|
2055
|
+
|
|
2056
|
+
if allow_states is not None:
|
|
2057
|
+
allow_states = _parse_code_list(allow_states)
|
|
2058
|
+
for state in allow_states:
|
|
2059
|
+
lines.append(species_string + ',allow,USA,' + state)
|
|
2060
|
+
|
|
2061
|
+
if block_states is not None:
|
|
2062
|
+
block_states = _parse_code_list(block_states)
|
|
2063
|
+
for state in block_states:
|
|
2064
|
+
lines.append(species_string + ',block,USA,' + state)
|
|
2065
|
+
|
|
2066
|
+
return lines
|
|
1339
2067
|
|
|
1340
|
-
#
|
|
1341
|
-
country_to_country_code = None
|
|
2068
|
+
# ...def generate_csv_rows_for_species(...)
|
|
1342
2069
|
|
|
1343
|
-
# Maps upper-case country codes to lower-case country names
|
|
1344
|
-
country_code_to_country = None
|
|
1345
2070
|
|
|
1346
2071
|
def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
|
|
1347
2072
|
"""
|
|
@@ -1351,10 +2076,13 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
|
|
|
1351
2076
|
|
|
1352
2077
|
Args:
|
|
1353
2078
|
geofencing_file (str): .json file with geofencing rules
|
|
1354
|
-
country_code_file (str): .csv file with country code mappings
|
|
2079
|
+
country_code_file (str): .csv file with country code mappings, in columns
|
|
2080
|
+
called "name" and "alpha-3", e.g. from
|
|
2081
|
+
https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
|
|
1355
2082
|
force_init (bool, optional): if the output dicts already exist, should we
|
|
1356
2083
|
re-initialize anyway?
|
|
1357
2084
|
"""
|
|
2085
|
+
|
|
1358
2086
|
global taxonomy_string_to_geofencing_rules
|
|
1359
2087
|
global country_to_country_code
|
|
1360
2088
|
global country_code_to_country
|
|
@@ -1404,29 +2132,63 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
|
|
|
1404
2132
|
|
|
1405
2133
|
species_rules = taxonomy_string_to_geofencing_rules[species_string]
|
|
1406
2134
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
assert len(species_rules.keys()) == 1
|
|
1410
|
-
rule_type = list(species_rules.keys())[0]
|
|
1411
|
-
assert rule_type in ('allow','block')
|
|
1412
|
-
|
|
1413
|
-
all_country_rules_this_species = species_rules[rule_type]
|
|
1414
|
-
for country_code in all_country_rules_this_species.keys():
|
|
1415
|
-
|
|
1416
|
-
assert country_code in country_code_to_country
|
|
2135
|
+
if len(species_rules.keys()) > 1:
|
|
2136
|
+
print('Warning: taxon {} has both allow and block rules'.format(species_string))
|
|
1417
2137
|
|
|
1418
|
-
|
|
2138
|
+
for rule_type in species_rules.keys():
|
|
1419
2139
|
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
2140
|
+
assert rule_type in ('allow','block')
|
|
2141
|
+
all_country_rules_this_species = species_rules[rule_type]
|
|
2142
|
+
|
|
2143
|
+
for country_code in all_country_rules_this_species.keys():
|
|
2144
|
+
assert country_code in country_code_to_country
|
|
2145
|
+
region_rules = all_country_rules_this_species[country_code]
|
|
2146
|
+
# Right now we only have regional rules for the USA; these may be part of
|
|
2147
|
+
# allow or block rules.
|
|
2148
|
+
if len(region_rules) > 0:
|
|
2149
|
+
assert country_code == 'USA'
|
|
1424
2150
|
|
|
1425
2151
|
# ...for each species
|
|
1426
2152
|
|
|
1427
2153
|
# ...def initialize_geofencing(...)
|
|
1428
2154
|
|
|
1429
2155
|
|
|
2156
|
+
def _species_string_to_canonical_species_string(species):
|
|
2157
|
+
"""
|
|
2158
|
+
Convert a string that may be a 5-token species string, a binomial name,
|
|
2159
|
+
or a common name into a 5-token species string, using taxonomic lookup.
|
|
2160
|
+
"""
|
|
2161
|
+
|
|
2162
|
+
global taxonomy_string_to_taxonomy_info
|
|
2163
|
+
global binomial_name_to_taxonomy_info
|
|
2164
|
+
global common_name_to_taxonomy_info
|
|
2165
|
+
|
|
2166
|
+
assert taxonomy_string_to_geofencing_rules is not None, \
|
|
2167
|
+
'Initialize geofencing prior to species lookup'
|
|
2168
|
+
assert taxonomy_string_to_taxonomy_info is not None, \
|
|
2169
|
+
'Initialize taxonomy lookup prior to species lookup'
|
|
2170
|
+
|
|
2171
|
+
species = species.lower()
|
|
2172
|
+
|
|
2173
|
+
# Turn "species" into a taxonomy string
|
|
2174
|
+
|
|
2175
|
+
# If this is already a taxonomy string...
|
|
2176
|
+
if len(species.split(';')) == 5:
|
|
2177
|
+
pass
|
|
2178
|
+
# If this is a common name...
|
|
2179
|
+
elif species in common_name_to_taxonomy_info:
|
|
2180
|
+
taxonomy_info = common_name_to_taxonomy_info[species]
|
|
2181
|
+
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2182
|
+
# If this is a binomial name...
|
|
2183
|
+
elif (species in binomial_name_to_taxonomy_info):
|
|
2184
|
+
taxonomy_info = binomial_name_to_taxonomy_info[species]
|
|
2185
|
+
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2186
|
+
else:
|
|
2187
|
+
raise ValueError('Could not find taxonomic information for {}'.format(species))
|
|
2188
|
+
|
|
2189
|
+
return taxonomy_string
|
|
2190
|
+
|
|
2191
|
+
|
|
1430
2192
|
def species_allowed_in_country(species,country,state=None,return_status=False):
|
|
1431
2193
|
"""
|
|
1432
2194
|
Determines whether [species] is allowed in [country], according to
|
|
@@ -1445,35 +2207,16 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
|
|
|
1445
2207
|
False. Returns a more detailed string if return_status is set.
|
|
1446
2208
|
"""
|
|
1447
2209
|
|
|
2210
|
+
global taxonomy_string_to_taxonomy_info
|
|
2211
|
+
global binomial_name_to_taxonomy_info
|
|
2212
|
+
global common_name_to_taxonomy_info
|
|
2213
|
+
|
|
1448
2214
|
assert taxonomy_string_to_geofencing_rules is not None, \
|
|
1449
2215
|
'Initialize geofencing prior to species lookup'
|
|
1450
2216
|
assert taxonomy_string_to_taxonomy_info is not None, \
|
|
1451
2217
|
'Initialize taxonomy lookup prior to species lookup'
|
|
1452
2218
|
|
|
1453
|
-
|
|
1454
|
-
# species = 'didelphis marsupialis'
|
|
1455
|
-
# country = 'Guatemala'
|
|
1456
|
-
|
|
1457
|
-
# species = 'common opossum'
|
|
1458
|
-
|
|
1459
|
-
species = species.lower()
|
|
1460
|
-
|
|
1461
|
-
# Turn "species" into a taxonomy string
|
|
1462
|
-
|
|
1463
|
-
# If this is already a taxonomy string...
|
|
1464
|
-
if len(species.split(';')) == 5:
|
|
1465
|
-
pass
|
|
1466
|
-
# If this is a binomial name...
|
|
1467
|
-
elif len(species.split(' ')) == 2 and (species in binomial_name_to_taxonomy_info):
|
|
1468
|
-
taxonomy_info = binomial_name_to_taxonomy_info[species]
|
|
1469
|
-
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
1470
|
-
# If this is a common name...
|
|
1471
|
-
elif species in common_name_to_taxonomy_info:
|
|
1472
|
-
taxonomy_info = common_name_to_taxonomy_info[species]
|
|
1473
|
-
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
1474
|
-
else:
|
|
1475
|
-
raise ValueError('Could not find taxonomic information for {}'.format(species))
|
|
1476
|
-
|
|
2219
|
+
taxonomy_string = _species_string_to_canonical_species_string(species)
|
|
1477
2220
|
|
|
1478
2221
|
# Normalize [state]
|
|
1479
2222
|
|
|
@@ -1504,27 +2247,34 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
|
|
|
1504
2247
|
allowed_countries = []
|
|
1505
2248
|
blocked_countries = []
|
|
1506
2249
|
|
|
1507
|
-
|
|
1508
|
-
rule_type
|
|
1509
|
-
|
|
2250
|
+
rule_types_this_species = list(geofencing_rules_this_species.keys())
|
|
2251
|
+
for rule_type in rule_types_this_species:
|
|
2252
|
+
assert rule_type in ('allow','block')
|
|
1510
2253
|
|
|
1511
|
-
if
|
|
1512
|
-
allowed_countries = list(geofencing_rules_this_species['allow'])
|
|
1513
|
-
else:
|
|
1514
|
-
assert rule_type == 'block'
|
|
2254
|
+
if 'block' in rule_types_this_species:
|
|
1515
2255
|
blocked_countries = list(geofencing_rules_this_species['block'])
|
|
2256
|
+
if 'allow' in rule_types_this_species:
|
|
2257
|
+
allowed_countries = list(geofencing_rules_this_species['allow'])
|
|
1516
2258
|
|
|
1517
2259
|
status = None
|
|
2260
|
+
|
|
2261
|
+
# The convention is that block rules win over allow rules
|
|
1518
2262
|
if country_code in blocked_countries:
|
|
1519
|
-
|
|
2263
|
+
if country_code in allowed_countries:
|
|
2264
|
+
status = 'blocked_over_allow'
|
|
2265
|
+
else:
|
|
2266
|
+
status = 'blocked'
|
|
1520
2267
|
elif country_code in allowed_countries:
|
|
1521
2268
|
status = 'allowed'
|
|
1522
|
-
|
|
2269
|
+
elif len(allowed_countries) > 0:
|
|
1523
2270
|
# The convention is that if allow rules exist, any country not on that list
|
|
1524
2271
|
# is blocked.
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
2272
|
+
status = 'block_not_on_country_allow_list'
|
|
2273
|
+
else:
|
|
2274
|
+
# Only block rules exist for this species, and they don't include this country
|
|
2275
|
+
assert len(blocked_countries) > 0
|
|
2276
|
+
status = 'allow_not_on_block_list'
|
|
2277
|
+
|
|
1528
2278
|
# Now let's see whether we have to deal with any regional rules
|
|
1529
2279
|
if state is None:
|
|
1530
2280
|
|
|
@@ -1565,71 +2315,478 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
|
|
|
1565
2315
|
# ...def species_allowed_in_country(...)
|
|
1566
2316
|
|
|
1567
2317
|
|
|
1568
|
-
|
|
2318
|
+
def restrict_to_taxa_list(taxa_list,
|
|
2319
|
+
speciesnet_taxonomy_file,
|
|
2320
|
+
input_file,
|
|
2321
|
+
output_file,
|
|
2322
|
+
allow_walk_down=False):
|
|
2323
|
+
"""
|
|
2324
|
+
Given a prediction file in MD .json format, likely without having had
|
|
2325
|
+
a geofence applied, apply a custom taxa list.
|
|
2326
|
+
|
|
2327
|
+
Args:
|
|
2328
|
+
taxa_list (str or list): list of latin names, or a text file containing
|
|
2329
|
+
a list of latin names. Optionally may contain a second (comma-delimited)
|
|
2330
|
+
column containing common names, used only for debugging. Latin names
|
|
2331
|
+
must exist in the SpeciesNet taxonomy.
|
|
2332
|
+
taxonomy_file (str): taxonomy filename, in the same format used for model
|
|
2333
|
+
release (with 7-token taxonomy entries)
|
|
2334
|
+
output_file (str): .json file to write, in MD format
|
|
2335
|
+
allow_walk_down (bool, optional): should we walk down the taxonomy tree
|
|
2336
|
+
when making mappings if a parent has only a single allowable child?
|
|
2337
|
+
For example, if only a single felid species is allowed, should other
|
|
2338
|
+
felid predictions be mapped to that species, as opposed to being mapped
|
|
2339
|
+
to the family?
|
|
2340
|
+
"""
|
|
1569
2341
|
|
|
1570
|
-
|
|
2342
|
+
##%% Read target taxa list
|
|
1571
2343
|
|
|
1572
|
-
|
|
2344
|
+
if isinstance(taxa_list,str):
|
|
2345
|
+
assert os.path.isfile(taxa_list), \
|
|
2346
|
+
'Could not find taxa list file {}'.format(taxa_list)
|
|
2347
|
+
with open(taxa_list,'r') as f:
|
|
2348
|
+
taxa_list = f.readlines()
|
|
2349
|
+
|
|
2350
|
+
taxa_list = [s.strip().lower() for s in taxa_list]
|
|
2351
|
+
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
2352
|
+
|
|
2353
|
+
target_latin_to_common = {}
|
|
2354
|
+
for s in taxa_list:
|
|
2355
|
+
if s.strip().startswith('#'):
|
|
2356
|
+
continue
|
|
2357
|
+
tokens = s.split(',')
|
|
2358
|
+
assert len(tokens) <= 2
|
|
2359
|
+
binomial_name = tokens[0]
|
|
2360
|
+
assert len(binomial_name.split(' ')) in (1,2,3), \
|
|
2361
|
+
'Illegal binomial name in species list: {}'.format(binomial_name)
|
|
2362
|
+
if len(tokens) > 0:
|
|
2363
|
+
common_name = tokens[1].strip().lower()
|
|
2364
|
+
else:
|
|
2365
|
+
common_name = None
|
|
2366
|
+
assert binomial_name not in target_latin_to_common
|
|
2367
|
+
target_latin_to_common[binomial_name] = common_name
|
|
2368
|
+
|
|
2369
|
+
|
|
2370
|
+
##%% Read taxonomy file
|
|
2371
|
+
|
|
2372
|
+
with open(speciesnet_taxonomy_file,'r') as f:
|
|
2373
|
+
speciesnet_taxonomy_list = f.readlines()
|
|
2374
|
+
speciesnet_taxonomy_list = [s.strip() for s in \
|
|
2375
|
+
speciesnet_taxonomy_list if len(s.strip()) > 0]
|
|
2376
|
+
|
|
2377
|
+
# Maps the latin name of every taxon to the corresponding full taxon string
|
|
2378
|
+
#
|
|
2379
|
+
# For species, the key is a binomial name
|
|
2380
|
+
speciesnet_latin_name_to_taxon_string = {}
|
|
2381
|
+
speciesnet_common_name_to_taxon_string = {}
|
|
2382
|
+
|
|
2383
|
+
def _insert_taxonomy_string(s):
|
|
2384
|
+
|
|
2385
|
+
tokens = s.split(';')
|
|
2386
|
+
assert len(tokens) == 7
|
|
2387
|
+
|
|
2388
|
+
guid = tokens[0] # noqa
|
|
2389
|
+
class_name = tokens[1]
|
|
2390
|
+
order = tokens[2]
|
|
2391
|
+
family = tokens[3]
|
|
2392
|
+
genus = tokens[4]
|
|
2393
|
+
species = tokens[5]
|
|
2394
|
+
common_name = tokens[6]
|
|
2395
|
+
|
|
2396
|
+
if len(class_name) == 0:
|
|
2397
|
+
assert common_name in ('animal','vehicle','blank')
|
|
2398
|
+
return
|
|
2399
|
+
|
|
2400
|
+
if len(species) > 0:
|
|
2401
|
+
assert all([len(s) > 0 for s in [genus,family,order]])
|
|
2402
|
+
binomial_name = genus + ' ' + species
|
|
2403
|
+
if binomial_name not in speciesnet_latin_name_to_taxon_string:
|
|
2404
|
+
speciesnet_latin_name_to_taxon_string[binomial_name] = s
|
|
2405
|
+
elif len(genus) > 0:
|
|
2406
|
+
assert all([len(s) > 0 for s in [family,order]])
|
|
2407
|
+
if genus not in speciesnet_latin_name_to_taxon_string:
|
|
2408
|
+
speciesnet_latin_name_to_taxon_string[genus] = s
|
|
2409
|
+
elif len(family) > 0:
|
|
2410
|
+
assert len(order) > 0
|
|
2411
|
+
if family not in speciesnet_latin_name_to_taxon_string:
|
|
2412
|
+
speciesnet_latin_name_to_taxon_string[family] = s
|
|
2413
|
+
elif len(order) > 0:
|
|
2414
|
+
if order not in speciesnet_latin_name_to_taxon_string:
|
|
2415
|
+
speciesnet_latin_name_to_taxon_string[order] = s
|
|
2416
|
+
else:
|
|
2417
|
+
if class_name not in speciesnet_latin_name_to_taxon_string:
|
|
2418
|
+
speciesnet_latin_name_to_taxon_string[class_name] = s
|
|
2419
|
+
|
|
2420
|
+
if len(common_name) > 0:
|
|
2421
|
+
if common_name not in speciesnet_common_name_to_taxon_string:
|
|
2422
|
+
speciesnet_common_name_to_taxon_string[common_name] = s
|
|
2423
|
+
|
|
2424
|
+
for s in speciesnet_taxonomy_list:
|
|
2425
|
+
|
|
2426
|
+
_insert_taxonomy_string(s)
|
|
2427
|
+
|
|
2428
|
+
|
|
2429
|
+
##%% Make sure all parent taxa are represented in the taxonomy
|
|
2430
|
+
|
|
2431
|
+
# In theory any taxon that appears as the parent of another taxon should
|
|
2432
|
+
# also be in the taxonomy, but this isn't always true, so we fix it here.
|
|
2433
|
+
|
|
2434
|
+
new_taxon_string_to_missing_tokens = defaultdict(list)
|
|
2435
|
+
|
|
2436
|
+
# latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
|
|
2437
|
+
for latin_name in speciesnet_latin_name_to_taxon_string.keys():
|
|
2438
|
+
|
|
2439
|
+
if 'no cv result' in latin_name:
|
|
2440
|
+
continue
|
|
2441
|
+
|
|
2442
|
+
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
2443
|
+
tokens = taxon_string.split(';')
|
|
2444
|
+
|
|
2445
|
+
# Don't process GUID, species, or common name
|
|
2446
|
+
# i_token = 6
|
|
2447
|
+
for i_token in range(1,len(tokens)-2):
|
|
2448
|
+
|
|
2449
|
+
test_token = tokens[i_token]
|
|
2450
|
+
if len(test_token) == 0:
|
|
2451
|
+
continue
|
|
2452
|
+
|
|
2453
|
+
# Do we need to make up a taxon for this token?
|
|
2454
|
+
if test_token not in speciesnet_latin_name_to_taxon_string:
|
|
2455
|
+
|
|
2456
|
+
new_tokens = [''] * 7
|
|
2457
|
+
new_tokens[0] = 'fake_guid'
|
|
2458
|
+
for i_copy_token in range(1,i_token+1):
|
|
2459
|
+
new_tokens[i_copy_token] = tokens[i_copy_token]
|
|
2460
|
+
new_tokens[-1] = test_token + ' species'
|
|
2461
|
+
assert new_tokens[-2] == ''
|
|
2462
|
+
new_taxon_string = ';'.join(new_tokens)
|
|
2463
|
+
# assert new_taxon_string not in new_taxon_strings
|
|
2464
|
+
new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
|
|
2465
|
+
|
|
2466
|
+
# ...for each token
|
|
2467
|
+
|
|
2468
|
+
# ...for each taxon
|
|
2469
|
+
|
|
2470
|
+
print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
|
|
2471
|
+
len(new_taxon_string_to_missing_tokens)))
|
|
2472
|
+
|
|
2473
|
+
new_taxon_string_to_missing_tokens = \
|
|
2474
|
+
sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
|
|
2475
|
+
for taxon_string in new_taxon_string_to_missing_tokens:
|
|
2476
|
+
missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
|
|
2477
|
+
print('{} ({})'.format(taxon_string,missing_taxa))
|
|
2478
|
+
|
|
2479
|
+
for new_taxon_string in new_taxon_string_to_missing_tokens:
|
|
2480
|
+
_insert_taxonomy_string(new_taxon_string)
|
|
2481
|
+
|
|
2482
|
+
|
|
2483
|
+
##%% Make sure all species on the allow-list are in the taxonomy
|
|
2484
|
+
|
|
2485
|
+
n_failed_mappings = 0
|
|
2486
|
+
|
|
2487
|
+
for target_taxon_latin_name in target_latin_to_common.keys():
|
|
2488
|
+
if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
|
|
2489
|
+
common_name = target_latin_to_common[target_taxon_latin_name]
|
|
2490
|
+
s = '{} ({}) not in speciesnet taxonomy'.format(
|
|
2491
|
+
target_taxon_latin_name,common_name)
|
|
2492
|
+
if common_name in speciesnet_common_name_to_taxon_string:
|
|
2493
|
+
s += ' (common name maps to {})'.format(
|
|
2494
|
+
speciesnet_common_name_to_taxon_string[common_name])
|
|
2495
|
+
print(s)
|
|
2496
|
+
n_failed_mappings += 1
|
|
2497
|
+
|
|
2498
|
+
if n_failed_mappings > 0:
|
|
2499
|
+
raise ValueError('Cannot continue with geofence generation')
|
|
2500
|
+
|
|
2501
|
+
|
|
2502
|
+
##%% For the allow-list, map each parent taxon to a set of allowable child taxa
|
|
2503
|
+
|
|
2504
|
+
# Maps parent names to all allowed child names, or None if this is the
|
|
2505
|
+
# lowest-level allowable taxon on this path
|
|
2506
|
+
allowed_parent_taxon_to_child_taxa = defaultdict(set)
|
|
2507
|
+
|
|
2508
|
+
# latin_name = next(iter(target_latin_to_common.keys()))
|
|
2509
|
+
for latin_name in target_latin_to_common:
|
|
2510
|
+
|
|
2511
|
+
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
2512
|
+
tokens = taxon_string.split(';')
|
|
2513
|
+
assert len(tokens) == 7
|
|
2514
|
+
|
|
2515
|
+
# Remove GUID and common mame
|
|
2516
|
+
#
|
|
2517
|
+
# This is now always class/order/family/genus/species
|
|
2518
|
+
tokens = tokens[1:-1]
|
|
2519
|
+
|
|
2520
|
+
child_taxon = None
|
|
2521
|
+
|
|
2522
|
+
# If this is a species
|
|
2523
|
+
if len(tokens[-1]) > 0:
|
|
2524
|
+
binomial_name = tokens[-2] + ' ' + tokens[-1]
|
|
2525
|
+
assert binomial_name == latin_name
|
|
2526
|
+
allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
|
|
2527
|
+
child_taxon = binomial_name
|
|
2528
|
+
|
|
2529
|
+
# The first candidate parent is the genus
|
|
2530
|
+
parent_token_index = len(tokens) - 2
|
|
1573
2531
|
|
|
1574
|
-
|
|
2532
|
+
while(parent_token_index >= 0):
|
|
2533
|
+
|
|
2534
|
+
parent_taxon = tokens[parent_token_index]
|
|
2535
|
+
allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
|
|
2536
|
+
child_taxon = parent_taxon
|
|
2537
|
+
parent_token_index -= 1
|
|
2538
|
+
|
|
2539
|
+
# ...for each allowed latin name
|
|
2540
|
+
|
|
2541
|
+
allowed_parent_taxon_to_child_taxa = \
|
|
2542
|
+
sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
|
|
2543
|
+
|
|
2544
|
+
|
|
2545
|
+
##%% Map all predictions that exist in this dataset...
|
|
2546
|
+
|
|
2547
|
+
# ...to the prediction we should generate.
|
|
2548
|
+
|
|
2549
|
+
with open(input_file,'r') as f:
|
|
2550
|
+
input_data = json.load(f)
|
|
2551
|
+
|
|
2552
|
+
input_category_id_to_common_name = input_data['classification_categories'] #noqa
|
|
2553
|
+
input_category_id_to_taxonomy_string = \
|
|
2554
|
+
input_data['classification_category_descriptions']
|
|
2555
|
+
|
|
2556
|
+
input_category_id_to_output_taxon_string = {}
|
|
2557
|
+
|
|
2558
|
+
# input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
|
|
2559
|
+
for input_category_id in input_category_id_to_taxonomy_string.keys():
|
|
2560
|
+
|
|
2561
|
+
input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
|
|
2562
|
+
input_taxon_tokens = input_taxon_string.split(';')
|
|
2563
|
+
assert len(input_taxon_tokens) == 7
|
|
2564
|
+
|
|
2565
|
+
# Don't mess with blank/no-cv-result/animal/human
|
|
2566
|
+
if (input_taxon_string in non_taxonomic_prediction_strings) or \
|
|
2567
|
+
(input_taxon_string == human_prediction_string):
|
|
2568
|
+
input_category_id_to_output_taxon_string[input_category_id] = \
|
|
2569
|
+
input_taxon_string
|
|
2570
|
+
continue
|
|
2571
|
+
|
|
2572
|
+
# Remove GUID and common mame
|
|
2573
|
+
#
|
|
2574
|
+
# This is now always class/order/family/genus/species
|
|
2575
|
+
input_taxon_tokens = input_taxon_tokens[1:-1]
|
|
2576
|
+
|
|
2577
|
+
test_index = len(input_taxon_tokens) - 1
|
|
2578
|
+
target_taxon = None
|
|
2579
|
+
|
|
2580
|
+
# Start at the species level, and see whether each taxon is allowed
|
|
2581
|
+
while((test_index >= 0) and (target_taxon is None)):
|
|
2582
|
+
|
|
2583
|
+
# Species are represented as binomial names
|
|
2584
|
+
if (test_index == (len(input_taxon_tokens) - 1)) and \
|
|
2585
|
+
(len(input_taxon_tokens[-1]) > 0):
|
|
2586
|
+
test_taxon_name = \
|
|
2587
|
+
input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
|
|
2588
|
+
else:
|
|
2589
|
+
test_taxon_name = input_taxon_tokens[test_index]
|
|
2590
|
+
|
|
2591
|
+
# If we haven't yet found the level at which this taxon is non-empty,
|
|
2592
|
+
# keep going up
|
|
2593
|
+
if len(test_taxon_name) == 0:
|
|
2594
|
+
test_index -= 1
|
|
2595
|
+
continue
|
|
2596
|
+
|
|
2597
|
+
assert test_taxon_name in speciesnet_latin_name_to_taxon_string
|
|
2598
|
+
|
|
2599
|
+
# Is this taxon allowed according to the custom species list?
|
|
2600
|
+
if test_taxon_name in allowed_parent_taxon_to_child_taxa:
|
|
2601
|
+
|
|
2602
|
+
allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
|
|
2603
|
+
assert allowed_child_taxa is not None
|
|
2604
|
+
|
|
2605
|
+
# If this is the lowest-level allowable token or there is not a
|
|
2606
|
+
# unique child, don't walk any further, even if walking down
|
|
2607
|
+
# is enabled.
|
|
2608
|
+
if (None in allowed_child_taxa):
|
|
2609
|
+
assert len(allowed_child_taxa) == 1
|
|
2610
|
+
|
|
2611
|
+
if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
|
|
2612
|
+
target_taxon = test_taxon_name
|
|
2613
|
+
elif not allow_walk_down:
|
|
2614
|
+
target_taxon = test_taxon_name
|
|
2615
|
+
else:
|
|
2616
|
+
# If there's a unique child, walk back *down* the allowable
|
|
2617
|
+
# taxa until we run out of unique children
|
|
2618
|
+
while ((next(iter(allowed_child_taxa)) is not None) and \
|
|
2619
|
+
(len(allowed_child_taxa) == 1)):
|
|
2620
|
+
candidate_taxon = next(iter(allowed_child_taxa))
|
|
2621
|
+
assert candidate_taxon in allowed_parent_taxon_to_child_taxa
|
|
2622
|
+
assert candidate_taxon in speciesnet_latin_name_to_taxon_string
|
|
2623
|
+
allowed_child_taxa = \
|
|
2624
|
+
allowed_parent_taxon_to_child_taxa[candidate_taxon]
|
|
2625
|
+
target_taxon = candidate_taxon
|
|
2626
|
+
|
|
2627
|
+
# ...if this is an allowed taxon
|
|
2628
|
+
|
|
2629
|
+
test_index -= 1
|
|
2630
|
+
|
|
2631
|
+
# ...for each token
|
|
2632
|
+
|
|
2633
|
+
if target_taxon is None:
|
|
2634
|
+
output_taxon_string = animal_prediction_string
|
|
2635
|
+
else:
|
|
2636
|
+
output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
|
|
2637
|
+
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
2638
|
+
|
|
2639
|
+
# ...for each category
|
|
2640
|
+
|
|
2641
|
+
|
|
2642
|
+
##%% Build the new tables
|
|
1575
2643
|
|
|
1576
|
-
|
|
2644
|
+
input_category_id_to_output_category_id = {}
|
|
2645
|
+
output_taxon_string_to_category_id = {}
|
|
2646
|
+
output_category_id_to_common_name = {}
|
|
2647
|
+
|
|
2648
|
+
for input_category_id in input_category_id_to_output_taxon_string:
|
|
2649
|
+
|
|
2650
|
+
original_common_name = \
|
|
2651
|
+
input_category_id_to_common_name[input_category_id]
|
|
2652
|
+
original_taxon_string = \
|
|
2653
|
+
input_category_id_to_taxonomy_string[input_category_id]
|
|
2654
|
+
output_taxon_string = \
|
|
2655
|
+
input_category_id_to_output_taxon_string[input_category_id]
|
|
2656
|
+
|
|
2657
|
+
output_common_name = output_taxon_string.split(';')[-1]
|
|
2658
|
+
|
|
2659
|
+
# Do we need to create a new output category?
|
|
2660
|
+
if output_taxon_string not in output_taxon_string_to_category_id:
|
|
2661
|
+
output_category_id = str(len(output_taxon_string_to_category_id))
|
|
2662
|
+
output_taxon_string_to_category_id[output_taxon_string] = \
|
|
2663
|
+
output_category_id
|
|
2664
|
+
output_category_id_to_common_name[output_category_id] = \
|
|
2665
|
+
output_common_name
|
|
2666
|
+
else:
|
|
2667
|
+
output_category_id = \
|
|
2668
|
+
output_taxon_string_to_category_id[output_taxon_string]
|
|
2669
|
+
|
|
2670
|
+
input_category_id_to_output_category_id[input_category_id] = \
|
|
2671
|
+
output_category_id
|
|
2672
|
+
|
|
2673
|
+
if False:
|
|
2674
|
+
print('Mapping {} ({}) to:\n{} ({})\n'.format(
|
|
2675
|
+
original_common_name,original_taxon_string,
|
|
2676
|
+
output_common_name,output_taxon_string))
|
|
2677
|
+
if False:
|
|
2678
|
+
print('Mapping {} to {}'.format(
|
|
2679
|
+
original_common_name,output_common_name,))
|
|
1577
2680
|
|
|
1578
|
-
|
|
2681
|
+
# ...for each category
|
|
1579
2682
|
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
2683
|
+
|
|
2684
|
+
##%% Remap all category labels
|
|
2685
|
+
|
|
2686
|
+
assert len(set(output_taxon_string_to_category_id.keys())) == \
|
|
2687
|
+
len(set(output_taxon_string_to_category_id.values()))
|
|
2688
|
+
|
|
2689
|
+
output_category_id_to_taxon_string = \
|
|
2690
|
+
invert_dictionary(output_taxon_string_to_category_id)
|
|
2691
|
+
|
|
2692
|
+
with open(input_file,'r') as f:
|
|
2693
|
+
output_data = json.load(f)
|
|
2694
|
+
|
|
2695
|
+
for im in tqdm(output_data['images']):
|
|
2696
|
+
if 'detections' in im and im['detections'] is not None:
|
|
2697
|
+
for det in im['detections']:
|
|
2698
|
+
if 'classifications' in det:
|
|
2699
|
+
for classification in det['classifications']:
|
|
2700
|
+
classification[0] = \
|
|
2701
|
+
input_category_id_to_output_category_id[classification[0]]
|
|
2702
|
+
|
|
2703
|
+
output_data['classification_categories'] = output_category_id_to_common_name
|
|
2704
|
+
output_data['classification_category_descriptions'] = \
|
|
2705
|
+
output_category_id_to_taxon_string
|
|
2706
|
+
|
|
2707
|
+
|
|
2708
|
+
##%% Write output
|
|
2709
|
+
|
|
2710
|
+
with open(output_file,'w') as f:
|
|
2711
|
+
json.dump(output_data,f,indent=1)
|
|
2712
|
+
|
|
2713
|
+
|
|
2714
|
+
#%% Interactive driver(s)
|
|
1586
2715
|
|
|
1587
|
-
|
|
2716
|
+
if False:
|
|
2717
|
+
|
|
2718
|
+
pass
|
|
1588
2719
|
|
|
2720
|
+
#%% Shared cell to initialize geofencing and taxonomy information
|
|
2721
|
+
|
|
2722
|
+
from megadetector.utils.wi_utils import species_allowed_in_country # noqa
|
|
2723
|
+
from megadetector.utils.wi_utils import initialize_geofencing, initialize_taxonomy_info # noqa
|
|
2724
|
+
from megadetector.utils.wi_utils import _species_string_to_canonical_species_string # noqa
|
|
2725
|
+
from megadetector.utils.wi_utils import generate_csv_rows_for_species # noqa
|
|
2726
|
+
from megadetector.utils.wi_utils import _generate_csv_rows_to_block_all_countries_except # noqa
|
|
1589
2727
|
|
|
1590
|
-
|
|
2728
|
+
from megadetector.utils.wi_utils import taxonomy_string_to_geofencing_rules # noqa
|
|
2729
|
+
from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
|
|
2730
|
+
from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
|
|
2731
|
+
from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
|
|
1591
2732
|
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
2733
|
+
model_base = os.path.expanduser('~/models/speciesnet')
|
|
2734
|
+
geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
|
|
2735
|
+
country_code_file = os.path.join(model_base,'country-codes.csv')
|
|
2736
|
+
# encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
|
|
2737
|
+
encoding = None; taxonomy_file = os.path.join(model_base,'taxonomy_mapping.json')
|
|
1597
2738
|
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
|
|
1602
|
-
'/mnt/g/temp/water-hole')
|
|
2739
|
+
initialize_geofencing(geofencing_file, country_code_file, force_init=True)
|
|
2740
|
+
initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
|
|
2741
|
+
|
|
1603
2742
|
|
|
2743
|
+
#%% Test driver for geofence_fixes.csv function
|
|
1604
2744
|
|
|
1605
|
-
|
|
2745
|
+
block_except_list = 'AUS, PNG, THA, IDN, MYS'
|
|
2746
|
+
species = 'dingo'
|
|
2747
|
+
species_string = _species_string_to_canonical_species_string(species)
|
|
2748
|
+
rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
|
|
1606
2749
|
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
|
|
2750
|
+
# import clipboard; clipboard.copy('\n'.join(rows))
|
|
2751
|
+
print(rows)
|
|
1610
2752
|
|
|
1611
|
-
initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
|
|
1612
|
-
initialize_geofencing(geofencing_file, country_code_file, force_init=True)
|
|
1613
2753
|
|
|
1614
|
-
|
|
1615
|
-
print(binomial_name_to_taxonomy_info[species])
|
|
1616
|
-
country = 'Guatemala'
|
|
1617
|
-
assert species_allowed_in_country(species, country)
|
|
2754
|
+
#%%
|
|
1618
2755
|
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
assert
|
|
2756
|
+
taxon_name = 'hippopotamus amphibius'
|
|
2757
|
+
taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
|
|
2758
|
+
taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2759
|
+
assert len(taxonomy_string_short.split(';')) == 5
|
|
1623
2760
|
|
|
2761
|
+
generate_csv_rows_for_species(species_string=taxonomy_string_short,
|
|
2762
|
+
allow_countries=['COL'],
|
|
2763
|
+
block_countries=None,
|
|
2764
|
+
allow_states=None,
|
|
2765
|
+
block_states=None,
|
|
2766
|
+
blockexcept_countries=None)
|
|
2767
|
+
|
|
2768
|
+
# _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
|
|
1624
2769
|
|
|
1625
|
-
#%% Test several species
|
|
1626
2770
|
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
2771
|
+
#%% Test the effects of geofence changes
|
|
2772
|
+
|
|
2773
|
+
species = 'canis lupus dingo'
|
|
2774
|
+
country = 'guatemala'
|
|
2775
|
+
species_allowed_in_country(species,country,state=None,return_status=False)
|
|
2776
|
+
|
|
1630
2777
|
|
|
1631
|
-
|
|
1632
|
-
|
|
2778
|
+
#%% Geofencing lookups
|
|
2779
|
+
|
|
2780
|
+
# This can be a latin or common name
|
|
2781
|
+
species = 'hippopotamidae'
|
|
2782
|
+
# print(common_name_to_taxonomy_info[species])
|
|
2783
|
+
|
|
2784
|
+
# This can be a name or country code
|
|
2785
|
+
country = 'USA'
|
|
2786
|
+
print(species_allowed_in_country(species, country))
|
|
2787
|
+
|
|
2788
|
+
|
|
2789
|
+
#%% Bulk geofence lookups
|
|
1633
2790
|
|
|
1634
2791
|
if True:
|
|
1635
2792
|
|
|
@@ -1709,86 +2866,3 @@ if False:
|
|
|
1709
2866
|
if state is not None:
|
|
1710
2867
|
state_string = ' ({})'.format(state)
|
|
1711
2868
|
print('{} ({}) for {}{}: {}'.format(taxonomy_info['common_name'],species,country,state_string,allowed))
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
#%% Test conversion from predictons.json to MD format
|
|
1715
|
-
|
|
1716
|
-
import os # noqa
|
|
1717
|
-
from megadetector.utils.wi_utils import generate_md_results_from_predictions_json # noqa
|
|
1718
|
-
|
|
1719
|
-
# detector_source = 'speciesnet'
|
|
1720
|
-
detector_source = 'md'
|
|
1721
|
-
|
|
1722
|
-
if False:
|
|
1723
|
-
image_folder = r'g:\temp\md-test-images'
|
|
1724
|
-
base_folder = '/home/dmorris/tmp/md-test-images/'
|
|
1725
|
-
if detector_source == 'speciesnet':
|
|
1726
|
-
predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output.json"
|
|
1727
|
-
md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format.json"
|
|
1728
|
-
else:
|
|
1729
|
-
assert detector_source == 'md'
|
|
1730
|
-
predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-from-md-results.json"
|
|
1731
|
-
md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format-from-md-results.json"
|
|
1732
|
-
else:
|
|
1733
|
-
image_folder = r'g:\temp\water-hole'
|
|
1734
|
-
base_folder = '/mnt/g/temp/water-hole/'
|
|
1735
|
-
if detector_source == 'speciesnet':
|
|
1736
|
-
predictions_json_file = r'g:\temp\water-hole\ensemble-output.json'
|
|
1737
|
-
md_results_file = r'g:\temp\water-hole\ensemble-output.md_format.json'
|
|
1738
|
-
else:
|
|
1739
|
-
assert detector_source == 'md'
|
|
1740
|
-
predictions_json_file = r'g:\temp\water-hole\ensemble-output-md.json'
|
|
1741
|
-
md_results_file = r'g:\temp\water-hole\ensemble-output-md.md_format.json'
|
|
1742
|
-
|
|
1743
|
-
generate_md_results_from_predictions_json(predictions_json_file=predictions_json_file,
|
|
1744
|
-
md_results_file=md_results_file,
|
|
1745
|
-
base_folder=base_folder)
|
|
1746
|
-
|
|
1747
|
-
# from megadetector.utils.path_utils import open_file; open_file(md_results_file)
|
|
1748
|
-
|
|
1749
|
-
assert os.path.isdir(image_folder)
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
#%% Preview
|
|
1753
|
-
|
|
1754
|
-
from megadetector.postprocessing.postprocess_batch_results import \
|
|
1755
|
-
PostProcessingOptions, process_batch_results
|
|
1756
|
-
from megadetector.utils import path_utils
|
|
1757
|
-
|
|
1758
|
-
render_animals_only = False
|
|
1759
|
-
|
|
1760
|
-
options = PostProcessingOptions()
|
|
1761
|
-
options.image_base_dir = image_folder
|
|
1762
|
-
options.include_almost_detections = True
|
|
1763
|
-
options.num_images_to_sample = None
|
|
1764
|
-
options.confidence_threshold = 0.2
|
|
1765
|
-
options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
|
|
1766
|
-
options.ground_truth_json_file = None
|
|
1767
|
-
options.separate_detections_by_category = True
|
|
1768
|
-
options.sample_seed = 0
|
|
1769
|
-
options.max_figures_per_html_file = 5000
|
|
1770
|
-
|
|
1771
|
-
options.parallelize_rendering = True
|
|
1772
|
-
options.parallelize_rendering_n_cores = 10
|
|
1773
|
-
options.parallelize_rendering_with_threads = True
|
|
1774
|
-
options.sort_classification_results_by_count = True
|
|
1775
|
-
|
|
1776
|
-
if render_animals_only:
|
|
1777
|
-
# Omit some pages from the output, useful when animals are rare
|
|
1778
|
-
options.rendering_bypass_sets = ['detections_person','detections_vehicle',
|
|
1779
|
-
'detections_person_vehicle','non_detections']
|
|
1780
|
-
|
|
1781
|
-
output_base = r'g:\temp\preview' + '_' + detector_source
|
|
1782
|
-
if render_animals_only:
|
|
1783
|
-
output_base = output_base + '_render_animals_only'
|
|
1784
|
-
os.makedirs(output_base, exist_ok=True)
|
|
1785
|
-
|
|
1786
|
-
print('Writing preview to {}'.format(output_base))
|
|
1787
|
-
|
|
1788
|
-
options.md_results_file = md_results_file
|
|
1789
|
-
options.output_dir = output_base
|
|
1790
|
-
ppresults = process_batch_results(options)
|
|
1791
|
-
html_output_file = ppresults.output_html_file
|
|
1792
|
-
|
|
1793
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
1794
|
-
# import clipboard; clipboard.copy(html_output_file)
|