megadetector 5.0.25__py3-none-any.whl → 5.0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/create_crop_folder.py +95 -33
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +156 -42
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +4 -2
- megadetector/utils/directory_listing.py +1 -1
- megadetector/utils/md_tests.py +2 -1
- megadetector/utils/path_utils.py +308 -19
- megadetector/utils/wi_utils.py +363 -186
- megadetector/visualization/visualization_utils.py +2 -1
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/METADATA +4 -3
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/RECORD +34 -34
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/WHEEL +1 -1
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/top_level.txt +0 -0
megadetector/utils/wi_utils.py
CHANGED
|
@@ -84,6 +84,105 @@ def is_valid_taxonomy_string(s):
|
|
|
84
84
|
return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
|
|
85
85
|
|
|
86
86
|
|
|
87
|
+
def clean_taxonomy_string(s):
|
|
88
|
+
"""
|
|
89
|
+
If [s] is a seven-token prediction string, trim the GUID and common name to produce
|
|
90
|
+
a "clean" taxonomy string. Else if [s] is a five-token string, return it. Else error.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
s (str): the seven- or five-token taxonomy/prediction string to clean
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
str: the five-token taxonomy string
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
if is_valid_taxonomy_string(s):
|
|
100
|
+
return s
|
|
101
|
+
elif is_valid_prediction_string(s):
|
|
102
|
+
tokens = s.split(';')
|
|
103
|
+
assert len(tokens) == 7
|
|
104
|
+
return ';'.join(tokens[1:-1])
|
|
105
|
+
else:
|
|
106
|
+
raise ValueError('Invalid taxonomy string')
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
taxonomy_level_names = \
|
|
110
|
+
['non-taxonomic','kingdom','phylum','class','order','family','genus','species','subspecies']
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def taxonomy_level_to_string(k):
|
|
114
|
+
"""
|
|
115
|
+
Maps taxonomy level indices (0 for kindgom, 1 for phylum, etc.) to strings.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
k (int): taxonomy level index
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
str: taxonomy level string
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
assert k >= 0 and k < len(taxonomy_level_names), \
|
|
125
|
+
'Illegal taxonomy level index {}'.format(k)
|
|
126
|
+
|
|
127
|
+
return taxonomy_level_names[k]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def taxonomy_level_string_to_index(s):
|
|
131
|
+
"""
|
|
132
|
+
Maps strings ('kingdom', 'species', etc.) to level indices.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
s (str): taxonomy level string
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
int: taxonomy level index
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
assert s in taxonomy_level_names, 'Unrecognized taxonomy level string {}'.format(s)
|
|
142
|
+
return taxonomy_level_names.index(s)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def taxonomy_level_index(s):
|
|
146
|
+
"""
|
|
147
|
+
Returns the taxonomy level up to which [s] is defined (0 for non-taxnomic, 1 for kingdom,
|
|
148
|
+
2 for phylum, etc. Empty strings and non-taxonomic strings are treated as level 0. 1 and 2
|
|
149
|
+
will never be returned; "animal" doesn't look like other taxonomic strings, so here we treat
|
|
150
|
+
it as non-taxonomic.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
s (str): 5-token or 7-token taxonomy string
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
int: taxonomy level
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
if s in non_taxonomic_prediction_strings or s in non_taxonomic_prediction_short_strings:
|
|
160
|
+
return 0
|
|
161
|
+
|
|
162
|
+
tokens = s.split(';')
|
|
163
|
+
assert len(tokens) in (5,7)
|
|
164
|
+
|
|
165
|
+
if len(tokens) == 7:
|
|
166
|
+
tokens = tokens[1:-1]
|
|
167
|
+
|
|
168
|
+
if len(tokens[0]) == 0:
|
|
169
|
+
return 0
|
|
170
|
+
# WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
|
|
171
|
+
elif len(tokens[1]) == 0:
|
|
172
|
+
return 3
|
|
173
|
+
elif len(tokens[2]) == 0:
|
|
174
|
+
return 4
|
|
175
|
+
elif len(tokens[3]) == 0:
|
|
176
|
+
return 5
|
|
177
|
+
elif len(tokens[4]) == 0:
|
|
178
|
+
return 6
|
|
179
|
+
# Subspecies are delimited with a space
|
|
180
|
+
elif ' ' not in tokens[4]:
|
|
181
|
+
return 7
|
|
182
|
+
else:
|
|
183
|
+
return 8
|
|
184
|
+
|
|
185
|
+
|
|
87
186
|
def wi_result_to_prediction_string(r):
|
|
88
187
|
"""
|
|
89
188
|
Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
|
|
@@ -500,6 +599,10 @@ non_taxonomic_prediction_strings = [blank_prediction_string,
|
|
|
500
599
|
animal_prediction_string,
|
|
501
600
|
vehicle_prediction_string]
|
|
502
601
|
|
|
602
|
+
non_taxonomic_prediction_short_strings = [';'.join(s.split(';')[1:-1]) for s in \
|
|
603
|
+
non_taxonomic_prediction_strings]
|
|
604
|
+
|
|
605
|
+
|
|
503
606
|
process_cv_response_url = 'https://placeholder'
|
|
504
607
|
|
|
505
608
|
|
|
@@ -912,6 +1015,19 @@ def is_human_classification(prediction_string):
|
|
|
912
1015
|
bool: whether this string corresponds to a human category
|
|
913
1016
|
"""
|
|
914
1017
|
return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
def is_vehicle_classification(prediction_string):
|
|
1021
|
+
"""
|
|
1022
|
+
Determines whether the input string represents a vehicle classification.
|
|
1023
|
+
|
|
1024
|
+
Args:
|
|
1025
|
+
prediction_string (str): a string in the semicolon-delimited prediction string format
|
|
1026
|
+
|
|
1027
|
+
Returns:
|
|
1028
|
+
bool: whether this string corresponds to the vehicle category
|
|
1029
|
+
"""
|
|
1030
|
+
return prediction_string == vehicle_prediction_string
|
|
915
1031
|
|
|
916
1032
|
|
|
917
1033
|
def is_animal_classification(prediction_string):
|
|
@@ -939,17 +1055,114 @@ def is_animal_classification(prediction_string):
|
|
|
939
1055
|
return True
|
|
940
1056
|
|
|
941
1057
|
|
|
1058
|
+
def generate_whole_image_detections_for_classifications(classifications_json_file,
|
|
1059
|
+
detections_json_file,
|
|
1060
|
+
ensemble_json_file=None,
|
|
1061
|
+
ignore_blank_classifications=True):
|
|
1062
|
+
"""
|
|
1063
|
+
Given a set of classification results that were likely run on already-cropped
|
|
1064
|
+
image, generate a file of [fake] detections in which each image is covered
|
|
1065
|
+
in a single whole-image detection.
|
|
1066
|
+
|
|
1067
|
+
Args:
|
|
1068
|
+
classifications_json_file (str): SpeciesNet-formatted file containing classifications
|
|
1069
|
+
detections_json_file (str): SpeciesNet-formatted file to write with detections
|
|
1070
|
+
ensemble_json_file (str, optional): SpeciesNet-formatted file to write with detections
|
|
1071
|
+
and classfications
|
|
1072
|
+
ignore_blank_classifications (bool, optional): use non-top classifications when
|
|
1073
|
+
the top classification is "blank" or "no CV result"
|
|
1074
|
+
|
|
1075
|
+
Returns:
|
|
1076
|
+
dict: the contents of [detections_json_file]
|
|
1077
|
+
"""
|
|
1078
|
+
|
|
1079
|
+
with open(classifications_json_file,'r') as f:
|
|
1080
|
+
classification_results = json.load(f)
|
|
1081
|
+
predictions = classification_results['predictions']
|
|
1082
|
+
|
|
1083
|
+
output_predictions = []
|
|
1084
|
+
ensemble_predictions = []
|
|
1085
|
+
|
|
1086
|
+
# prediction = predictions[0]
|
|
1087
|
+
for prediction in predictions:
|
|
1088
|
+
|
|
1089
|
+
output_prediction = {}
|
|
1090
|
+
output_prediction['filepath'] = prediction['filepath']
|
|
1091
|
+
i_score = 0
|
|
1092
|
+
if ignore_blank_classifications:
|
|
1093
|
+
while (prediction['classifications']['classes'][i_score] in \
|
|
1094
|
+
(blank_prediction_string,no_cv_result_prediction_string)):
|
|
1095
|
+
i_score += 1
|
|
1096
|
+
top_classification = prediction['classifications']['classes'][i_score]
|
|
1097
|
+
top_classification_score = prediction['classifications']['scores'][i_score]
|
|
1098
|
+
if is_animal_classification(top_classification):
|
|
1099
|
+
category_name = 'animal'
|
|
1100
|
+
elif is_human_classification(top_classification):
|
|
1101
|
+
category_name = 'human'
|
|
1102
|
+
else:
|
|
1103
|
+
category_name = 'vehicle'
|
|
1104
|
+
|
|
1105
|
+
if category_name == 'human':
|
|
1106
|
+
md_category_name = 'person'
|
|
1107
|
+
else:
|
|
1108
|
+
md_category_name = category_name
|
|
1109
|
+
|
|
1110
|
+
output_detection = {}
|
|
1111
|
+
output_detection['label'] = category_name
|
|
1112
|
+
output_detection['category'] = md_category_name_to_id[md_category_name]
|
|
1113
|
+
output_detection['conf'] = 1.0
|
|
1114
|
+
output_detection['bbox'] = [0.0, 0.0, 1.0, 1.0]
|
|
1115
|
+
output_prediction['detections'] = [output_detection]
|
|
1116
|
+
output_predictions.append(output_prediction)
|
|
1117
|
+
|
|
1118
|
+
ensemble_prediction = {}
|
|
1119
|
+
ensemble_prediction['filepath'] = prediction['filepath']
|
|
1120
|
+
ensemble_prediction['detections'] = [output_detection]
|
|
1121
|
+
ensemble_prediction['prediction'] = top_classification
|
|
1122
|
+
ensemble_prediction['prediction_score'] = top_classification_score
|
|
1123
|
+
ensemble_prediction['prediction_source'] = 'fake_ensemble_file_utility'
|
|
1124
|
+
ensemble_prediction['classifications'] = prediction['classifications']
|
|
1125
|
+
ensemble_predictions.append(ensemble_prediction)
|
|
1126
|
+
|
|
1127
|
+
# ...for each image
|
|
1128
|
+
|
|
1129
|
+
## Write output
|
|
1130
|
+
|
|
1131
|
+
if ensemble_json_file is not None:
|
|
1132
|
+
|
|
1133
|
+
ensemble_output_data = {'predictions':ensemble_predictions}
|
|
1134
|
+
with open(ensemble_json_file,'w') as f:
|
|
1135
|
+
json.dump(ensemble_output_data,f,indent=1)
|
|
1136
|
+
_ = validate_predictions_file(ensemble_json_file)
|
|
1137
|
+
|
|
1138
|
+
output_data = {'predictions':output_predictions}
|
|
1139
|
+
with open(detections_json_file,'w') as f:
|
|
1140
|
+
json.dump(output_data,f,indent=1)
|
|
1141
|
+
return validate_predictions_file(detections_json_file)
|
|
1142
|
+
|
|
1143
|
+
# ...def generate_whole_image_detections_for_classifications(...)
|
|
1144
|
+
|
|
1145
|
+
|
|
942
1146
|
def generate_md_results_from_predictions_json(predictions_json_file,
|
|
943
1147
|
md_results_file,
|
|
944
1148
|
base_folder=None,
|
|
945
|
-
max_decimals=5
|
|
1149
|
+
max_decimals=5,
|
|
1150
|
+
convert_human_to_person=True):
|
|
946
1151
|
"""
|
|
947
|
-
Generate an MD-formatted .json file from a predictions.json file
|
|
948
|
-
MD results files use relative paths, and predictions.json
|
|
949
|
-
this function optionally removes the leading string
|
|
1152
|
+
Generate an MD-formatted .json file from a predictions.json file, generated by the
|
|
1153
|
+
SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
|
|
1154
|
+
files use absolute paths, so this function optionally removes the leading string
|
|
1155
|
+
[base_folder] from all file names.
|
|
1156
|
+
|
|
1157
|
+
Currently just applies the top classification category to every detection. If the top
|
|
1158
|
+
classification is "blank", writes an empty detection list.
|
|
1159
|
+
|
|
1160
|
+
Uses the classification from the "prediction" field if it's available, otherwise
|
|
1161
|
+
uses the "classifications" field.
|
|
950
1162
|
|
|
951
|
-
|
|
952
|
-
|
|
1163
|
+
When using the "prediction" field, records the top class in the "classifications" field to
|
|
1164
|
+
a field in each image called "top_classification_common_name". This is often different
|
|
1165
|
+
from the value of the "prediction" field.
|
|
953
1166
|
|
|
954
1167
|
speciesnet_to_md.py is a command-line driver for this function.
|
|
955
1168
|
|
|
@@ -960,6 +1173,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
960
1173
|
predictions.json file
|
|
961
1174
|
max_decimals (int, optional): number of decimal places to which we should round
|
|
962
1175
|
all values
|
|
1176
|
+
convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
|
|
1177
|
+
detection category "human"; MD files usually use "person". If True, switches "human"
|
|
1178
|
+
to "person".
|
|
963
1179
|
"""
|
|
964
1180
|
|
|
965
1181
|
# Read predictions file
|
|
@@ -1040,7 +1256,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1040
1256
|
# ...if detections are present
|
|
1041
1257
|
|
|
1042
1258
|
class_to_assign = None
|
|
1043
|
-
class_confidence = None
|
|
1259
|
+
class_confidence = None
|
|
1260
|
+
top_classification_common_name = None
|
|
1044
1261
|
|
|
1045
1262
|
if 'classifications' in im_in:
|
|
1046
1263
|
|
|
@@ -1050,8 +1267,15 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1050
1267
|
class_to_assign = classifications['classes'][0]
|
|
1051
1268
|
class_confidence = classifications['scores'][0]
|
|
1052
1269
|
|
|
1270
|
+
tokens = class_to_assign.split(';')
|
|
1271
|
+
assert len(tokens) == 7
|
|
1272
|
+
top_classification_common_name = tokens[-1]
|
|
1273
|
+
if len(top_classification_common_name) == 0:
|
|
1274
|
+
top_classification_common_name = 'undefined'
|
|
1275
|
+
|
|
1053
1276
|
if 'prediction' in im_in:
|
|
1054
1277
|
|
|
1278
|
+
im_out['top_classification_common_name'] = top_classification_common_name
|
|
1055
1279
|
class_to_assign = im_in['prediction']
|
|
1056
1280
|
class_confidence = im_in['prediction_score']
|
|
1057
1281
|
|
|
@@ -1111,8 +1335,7 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1111
1335
|
print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
|
|
1112
1336
|
base_folder))
|
|
1113
1337
|
|
|
1114
|
-
# Fix the 'unknown' category
|
|
1115
|
-
|
|
1338
|
+
# Fix the 'unknown' category
|
|
1116
1339
|
if len(all_unknown_detections) > 0:
|
|
1117
1340
|
|
|
1118
1341
|
max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
|
|
@@ -1144,6 +1367,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,
|
|
|
1144
1367
|
info['format_version'] = 1.4
|
|
1145
1368
|
info['detector'] = 'converted_from_predictions_json'
|
|
1146
1369
|
|
|
1370
|
+
if convert_human_to_person:
|
|
1371
|
+
for k in detection_categories_out.keys():
|
|
1372
|
+
if detection_categories_out[k] == 'human':
|
|
1373
|
+
detection_categories_out[k] = 'person'
|
|
1374
|
+
|
|
1147
1375
|
output_dict = {}
|
|
1148
1376
|
output_dict['info'] = info
|
|
1149
1377
|
output_dict['detection_categories'] = detection_categories_out
|
|
@@ -1223,6 +1451,7 @@ def generate_predictions_json_from_md_results(md_results_file,
|
|
|
1223
1451
|
|
|
1224
1452
|
# ...def generate_predictions_json_from_md_results(...)
|
|
1225
1453
|
|
|
1454
|
+
|
|
1226
1455
|
default_tokens_to_ignore = ['$RECYCLE.BIN']
|
|
1227
1456
|
|
|
1228
1457
|
def generate_instances_json_from_folder(folder,
|
|
@@ -1403,7 +1632,7 @@ def validate_predictions_file(fn,instances=None,verbose=True):
|
|
|
1403
1632
|
failures.append(im)
|
|
1404
1633
|
|
|
1405
1634
|
if verbose:
|
|
1406
|
-
print('Read
|
|
1635
|
+
print('Read predictions for {} images, with {} failure(s)'.format(
|
|
1407
1636
|
len(d['predictions']),len(failures)))
|
|
1408
1637
|
|
|
1409
1638
|
if instances is not None:
|
|
@@ -1454,6 +1683,7 @@ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
|
|
|
1454
1683
|
descending order by count.
|
|
1455
1684
|
"""
|
|
1456
1685
|
|
|
1686
|
+
# Load and validate ensemble results
|
|
1457
1687
|
ensemble_results = validate_predictions_file(ensemble_json_file)
|
|
1458
1688
|
|
|
1459
1689
|
assert isinstance(ensemble_results,dict)
|
|
@@ -1510,14 +1740,56 @@ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
|
|
|
1510
1740
|
# ...def find_geofence_adjustments(...)
|
|
1511
1741
|
|
|
1512
1742
|
|
|
1743
|
+
def generate_geofence_adjustment_html_summary(rollup_pair_to_count,min_count=10):
|
|
1744
|
+
"""
|
|
1745
|
+
Given a list of geofence rollups, likely generated by find_geofence_adjustments,
|
|
1746
|
+
generate an HTML summary of the changes made by geofencing. The resulting HTML
|
|
1747
|
+
is wrapped in <div>, but not, for example, in <html> or <body>.
|
|
1748
|
+
|
|
1749
|
+
Args:
|
|
1750
|
+
rollup_pair_to_count (dict): list of changes made by geofencing, see
|
|
1751
|
+
find_geofence_adjustments for details
|
|
1752
|
+
min_count (int, optional): minimum number of changes a pair needs in order
|
|
1753
|
+
to be included in the report.
|
|
1754
|
+
"""
|
|
1755
|
+
|
|
1756
|
+
geofence_footer = ''
|
|
1757
|
+
|
|
1758
|
+
# Restrict to the list of taxa that were impacted by geofencing
|
|
1759
|
+
rollup_pair_to_count = \
|
|
1760
|
+
{key: value for key, value in rollup_pair_to_count.items() if value >= min_count}
|
|
1761
|
+
|
|
1762
|
+
# rollup_pair_to_count is sorted in descending order by count
|
|
1763
|
+
assert is_list_sorted(list(rollup_pair_to_count.values()),reverse=True)
|
|
1764
|
+
|
|
1765
|
+
if len(rollup_pair_to_count) > 0:
|
|
1766
|
+
|
|
1767
|
+
geofence_footer = \
|
|
1768
|
+
'<h3>Geofence changes that occurred more than {} times</h3>\n'.format(min_count)
|
|
1769
|
+
geofence_footer += '<div class="contentdiv">\n'
|
|
1770
|
+
|
|
1771
|
+
print('\nRollup changes with count > {}:'.format(min_count))
|
|
1772
|
+
for rollup_pair in rollup_pair_to_count.keys():
|
|
1773
|
+
count = rollup_pair_to_count[rollup_pair]
|
|
1774
|
+
rollup_pair_s = rollup_pair.replace(',',' --> ')
|
|
1775
|
+
print('{}: {}'.format(rollup_pair_s,count))
|
|
1776
|
+
rollup_pair_html = rollup_pair.replace(',',' → ')
|
|
1777
|
+
geofence_footer += '{} ({})<br/>\n'.format(rollup_pair_html,count)
|
|
1778
|
+
|
|
1779
|
+
geofence_footer += '</div>\n'
|
|
1780
|
+
|
|
1781
|
+
return geofence_footer
|
|
1782
|
+
|
|
1783
|
+
# ...def generate_geofence_adjustment_html_summary(...)
|
|
1784
|
+
|
|
1785
|
+
|
|
1513
1786
|
#%% Module-level globals related to taxonomy mapping and geofencing
|
|
1514
1787
|
|
|
1515
1788
|
# This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
|
|
1516
1789
|
# a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
|
|
1517
1790
|
taxonomy_string_to_taxonomy_info = None
|
|
1518
1791
|
|
|
1519
|
-
# Maps a binomial name (
|
|
1520
|
-
# described above.
|
|
1792
|
+
# Maps a binomial name (one, two, or three ws-delimited tokens) to the same dict described above.
|
|
1521
1793
|
binomial_name_to_taxonomy_info = None
|
|
1522
1794
|
|
|
1523
1795
|
# Maps a common name to the same dict described above
|
|
@@ -1627,17 +1899,28 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
|
|
|
1627
1899
|
common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
|
|
1628
1900
|
|
|
1629
1901
|
taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
|
|
1630
|
-
|
|
1902
|
+
|
|
1903
|
+
binomial_name = None
|
|
1904
|
+
if len(tokens[4]) > 0 and len(tokens[5]) > 0:
|
|
1905
|
+
# strip(), but don't remove spaces from the species name;
|
|
1906
|
+
# subspecies are separated with a space, e.g. canis;lupus dingo
|
|
1907
|
+
binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
|
|
1908
|
+
elif len(tokens[4]) > 0:
|
|
1909
|
+
binomial_name = tokens[4].strip()
|
|
1910
|
+
elif len(tokens[3]) > 0:
|
|
1911
|
+
binomial_name = tokens[3].strip()
|
|
1912
|
+
elif len(tokens[2]) > 0:
|
|
1913
|
+
binomial_name = tokens[2].strip()
|
|
1914
|
+
elif len(tokens[1]) > 0:
|
|
1915
|
+
binomial_name = tokens[1].strip()
|
|
1916
|
+
if binomial_name is None:
|
|
1631
1917
|
# print('Warning: no binomial name for {}'.format(taxonomy_string))
|
|
1632
1918
|
pass
|
|
1633
1919
|
else:
|
|
1634
|
-
# strip(), but don't remove spaces from the species name;
|
|
1635
|
-
# subspecies are separated with a space, e.g. canis;lupus dingo
|
|
1636
|
-
binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
|
|
1637
1920
|
binomial_name_to_taxonomy_info[binomial_name] = taxon_info
|
|
1638
1921
|
|
|
1639
|
-
print('Created {} records in taxonomy_string_to_taxonomy_info'.format(
|
|
1640
|
-
|
|
1922
|
+
print('Created {} records in taxonomy_string_to_taxonomy_info'.format(len(taxonomy_string_to_taxonomy_info)))
|
|
1923
|
+
print('Created {} records in common_name_to_taxonomy_info'.format(len(common_name_to_taxonomy_info)))
|
|
1641
1924
|
|
|
1642
1925
|
# ...def initialize_taxonomy_info(...)
|
|
1643
1926
|
|
|
@@ -1741,7 +2024,7 @@ def generate_csv_rows_for_species(species_string,
|
|
|
1741
2024
|
and blocking a country.
|
|
1742
2025
|
|
|
1743
2026
|
Args:
|
|
1744
|
-
species_string (str): string in semicolon-delimited WI taxonomy format
|
|
2027
|
+
species_string (str): five-token string in semicolon-delimited WI taxonomy format
|
|
1745
2028
|
allow_countries (optional, list or str): three-letter country codes, list of
|
|
1746
2029
|
country codes, or comma-separated list of country codes to allow
|
|
1747
2030
|
block_countries (optional, list or str): three-letter country codes, list of
|
|
@@ -1849,23 +2132,21 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
|
|
|
1849
2132
|
|
|
1850
2133
|
species_rules = taxonomy_string_to_geofencing_rules[species_string]
|
|
1851
2134
|
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
assert len(species_rules.keys()) == 1
|
|
1855
|
-
rule_type = list(species_rules.keys())[0]
|
|
1856
|
-
assert rule_type in ('allow','block')
|
|
1857
|
-
|
|
1858
|
-
all_country_rules_this_species = species_rules[rule_type]
|
|
1859
|
-
for country_code in all_country_rules_this_species.keys():
|
|
1860
|
-
|
|
1861
|
-
assert country_code in country_code_to_country
|
|
2135
|
+
if len(species_rules.keys()) > 1:
|
|
2136
|
+
print('Warning: taxon {} has both allow and block rules'.format(species_string))
|
|
1862
2137
|
|
|
1863
|
-
|
|
2138
|
+
for rule_type in species_rules.keys():
|
|
2139
|
+
|
|
2140
|
+
assert rule_type in ('allow','block')
|
|
2141
|
+
all_country_rules_this_species = species_rules[rule_type]
|
|
1864
2142
|
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
2143
|
+
for country_code in all_country_rules_this_species.keys():
|
|
2144
|
+
assert country_code in country_code_to_country
|
|
2145
|
+
region_rules = all_country_rules_this_species[country_code]
|
|
2146
|
+
# Right now we only have regional rules for the USA; these may be part of
|
|
2147
|
+
# allow or block rules.
|
|
2148
|
+
if len(region_rules) > 0:
|
|
2149
|
+
assert country_code == 'USA'
|
|
1869
2150
|
|
|
1870
2151
|
# ...for each species
|
|
1871
2152
|
|
|
@@ -1875,7 +2156,7 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
|
|
|
1875
2156
|
def _species_string_to_canonical_species_string(species):
|
|
1876
2157
|
"""
|
|
1877
2158
|
Convert a string that may be a 5-token species string, a binomial name,
|
|
1878
|
-
or a common name into a 5-token species string.
|
|
2159
|
+
or a common name into a 5-token species string, using taxonomic lookup.
|
|
1879
2160
|
"""
|
|
1880
2161
|
|
|
1881
2162
|
global taxonomy_string_to_taxonomy_info
|
|
@@ -1894,14 +2175,14 @@ def _species_string_to_canonical_species_string(species):
|
|
|
1894
2175
|
# If this is already a taxonomy string...
|
|
1895
2176
|
if len(species.split(';')) == 5:
|
|
1896
2177
|
pass
|
|
1897
|
-
# If this is a binomial name (which may include a subspecies)...
|
|
1898
|
-
elif (len(species.split(' ')) in (2,3)) and (species in binomial_name_to_taxonomy_info):
|
|
1899
|
-
taxonomy_info = binomial_name_to_taxonomy_info[species]
|
|
1900
|
-
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
1901
2178
|
# If this is a common name...
|
|
1902
2179
|
elif species in common_name_to_taxonomy_info:
|
|
1903
2180
|
taxonomy_info = common_name_to_taxonomy_info[species]
|
|
1904
2181
|
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2182
|
+
# If this is a binomial name...
|
|
2183
|
+
elif (species in binomial_name_to_taxonomy_info):
|
|
2184
|
+
taxonomy_info = binomial_name_to_taxonomy_info[species]
|
|
2185
|
+
taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
1905
2186
|
else:
|
|
1906
2187
|
raise ValueError('Could not find taxonomic information for {}'.format(species))
|
|
1907
2188
|
|
|
@@ -1966,29 +2247,34 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
|
|
|
1966
2247
|
allowed_countries = []
|
|
1967
2248
|
blocked_countries = []
|
|
1968
2249
|
|
|
1969
|
-
|
|
1970
|
-
rule_type
|
|
1971
|
-
|
|
2250
|
+
rule_types_this_species = list(geofencing_rules_this_species.keys())
|
|
2251
|
+
for rule_type in rule_types_this_species:
|
|
2252
|
+
assert rule_type in ('allow','block')
|
|
1972
2253
|
|
|
1973
|
-
if
|
|
1974
|
-
allowed_countries = list(geofencing_rules_this_species['allow'])
|
|
1975
|
-
else:
|
|
1976
|
-
assert rule_type == 'block'
|
|
2254
|
+
if 'block' in rule_types_this_species:
|
|
1977
2255
|
blocked_countries = list(geofencing_rules_this_species['block'])
|
|
2256
|
+
if 'allow' in rule_types_this_species:
|
|
2257
|
+
allowed_countries = list(geofencing_rules_this_species['allow'])
|
|
1978
2258
|
|
|
1979
2259
|
status = None
|
|
1980
2260
|
|
|
1981
2261
|
# The convention is that block rules win over allow rules
|
|
1982
2262
|
if country_code in blocked_countries:
|
|
1983
|
-
|
|
2263
|
+
if country_code in allowed_countries:
|
|
2264
|
+
status = 'blocked_over_allow'
|
|
2265
|
+
else:
|
|
2266
|
+
status = 'blocked'
|
|
1984
2267
|
elif country_code in allowed_countries:
|
|
1985
2268
|
status = 'allowed'
|
|
1986
|
-
|
|
2269
|
+
elif len(allowed_countries) > 0:
|
|
1987
2270
|
# The convention is that if allow rules exist, any country not on that list
|
|
1988
2271
|
# is blocked.
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
2272
|
+
status = 'block_not_on_country_allow_list'
|
|
2273
|
+
else:
|
|
2274
|
+
# Only block rules exist for this species, and they don't include this country
|
|
2275
|
+
assert len(blocked_countries) > 0
|
|
2276
|
+
status = 'allow_not_on_block_list'
|
|
2277
|
+
|
|
1992
2278
|
# Now let's see whether we have to deal with any regional rules
|
|
1993
2279
|
if state is None:
|
|
1994
2280
|
|
|
@@ -2441,16 +2727,18 @@ if False:
|
|
|
2441
2727
|
|
|
2442
2728
|
from megadetector.utils.wi_utils import taxonomy_string_to_geofencing_rules # noqa
|
|
2443
2729
|
from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
|
|
2730
|
+
from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
|
|
2731
|
+
from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
|
|
2444
2732
|
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
country_code_file =
|
|
2733
|
+
model_base = os.path.expanduser('~/models/speciesnet')
|
|
2734
|
+
geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
|
|
2735
|
+
country_code_file = os.path.join(model_base,'country-codes.csv')
|
|
2448
2736
|
# encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
|
|
2449
|
-
encoding = None; taxonomy_file =
|
|
2737
|
+
encoding = None; taxonomy_file = os.path.join(model_base,'taxonomy_mapping.json')
|
|
2450
2738
|
|
|
2451
2739
|
initialize_geofencing(geofencing_file, country_code_file, force_init=True)
|
|
2452
2740
|
initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
|
|
2453
|
-
|
|
2741
|
+
|
|
2454
2742
|
|
|
2455
2743
|
#%% Test driver for geofence_fixes.csv function
|
|
2456
2744
|
|
|
@@ -2458,21 +2746,26 @@ if False:
|
|
|
2458
2746
|
species = 'dingo'
|
|
2459
2747
|
species_string = _species_string_to_canonical_species_string(species)
|
|
2460
2748
|
rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
|
|
2461
|
-
|
|
2462
|
-
import clipboard; clipboard.copy('\n'.join(rows))
|
|
2749
|
+
|
|
2750
|
+
# import clipboard; clipboard.copy('\n'.join(rows))
|
|
2751
|
+
print(rows)
|
|
2463
2752
|
|
|
2464
2753
|
|
|
2465
2754
|
#%%
|
|
2466
2755
|
|
|
2467
|
-
|
|
2468
|
-
|
|
2756
|
+
taxon_name = 'hippopotamus amphibius'
|
|
2757
|
+
taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
|
|
2758
|
+
taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
|
|
2759
|
+
assert len(taxonomy_string_short.split(';')) == 5
|
|
2760
|
+
|
|
2761
|
+
generate_csv_rows_for_species(species_string=taxonomy_string_short,
|
|
2762
|
+
allow_countries=['COL'],
|
|
2469
2763
|
block_countries=None,
|
|
2470
2764
|
allow_states=None,
|
|
2471
2765
|
block_states=None,
|
|
2472
2766
|
blockexcept_countries=None)
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
_generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
|
|
2767
|
+
|
|
2768
|
+
# _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
|
|
2476
2769
|
|
|
2477
2770
|
|
|
2478
2771
|
#%% Test the effects of geofence changes
|
|
@@ -2482,51 +2775,18 @@ if False:
|
|
|
2482
2775
|
species_allowed_in_country(species,country,state=None,return_status=False)
|
|
2483
2776
|
|
|
2484
2777
|
|
|
2485
|
-
#%%
|
|
2778
|
+
#%% Geofencing lookups
|
|
2486
2779
|
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
_ = generate_instances_json_from_folder(folder=r'g:\temp\water-hole',
|
|
2492
|
-
country='NAM',
|
|
2493
|
-
lat=None,
|
|
2494
|
-
lon=None,
|
|
2495
|
-
output_file=instances_file,
|
|
2496
|
-
filename_replacements={'g:/temp':'/mnt/g/temp'})
|
|
2497
|
-
|
|
2498
|
-
# from megadetector.utils.path_utils import open_file; open_file(instances_file)
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
#%% MD --> prediction conversion test
|
|
2780
|
+
# This can be a latin or common name
|
|
2781
|
+
species = 'hippopotamidae'
|
|
2782
|
+
# print(common_name_to_taxonomy_info[species])
|
|
2502
2783
|
|
|
2503
|
-
|
|
2504
|
-
md_results_file = r'G:\temp\md-test-images\mdv5a.relpaths.json'
|
|
2505
|
-
predictions_json_file = r'\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\mdv5a.abspaths.predictions-format.json'
|
|
2506
|
-
generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
|
|
2507
|
-
'/home/dmorris/tmp/md-test-images/')
|
|
2508
|
-
|
|
2509
|
-
from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
|
|
2510
|
-
md_results_file = r"G:\temp\water-hole\md_results.json"
|
|
2511
|
-
predictions_json_file = r"G:\temp\water-hole\md_results-prediction_format.json"
|
|
2512
|
-
generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
|
|
2513
|
-
'/mnt/g/temp/water-hole')
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
#%% Geofencing tests
|
|
2517
|
-
|
|
2518
|
-
species = 'didelphis marsupialis'
|
|
2519
|
-
print(binomial_name_to_taxonomy_info[species])
|
|
2520
|
-
country = 'Guatemala'
|
|
2521
|
-
assert species_allowed_in_country(species, country)
|
|
2522
|
-
|
|
2523
|
-
species = 'virginia opossum'
|
|
2524
|
-
print(common_name_to_taxonomy_info[species])
|
|
2784
|
+
# This can be a name or country code
|
|
2525
2785
|
country = 'USA'
|
|
2526
|
-
|
|
2786
|
+
print(species_allowed_in_country(species, country))
|
|
2527
2787
|
|
|
2528
2788
|
|
|
2529
|
-
#%%
|
|
2789
|
+
#%% Bulk geofence lookups
|
|
2530
2790
|
|
|
2531
2791
|
if True:
|
|
2532
2792
|
|
|
@@ -2606,86 +2866,3 @@ if False:
|
|
|
2606
2866
|
if state is not None:
|
|
2607
2867
|
state_string = ' ({})'.format(state)
|
|
2608
2868
|
print('{} ({}) for {}{}: {}'.format(taxonomy_info['common_name'],species,country,state_string,allowed))
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
#%% Test conversion from predictons.json to MD format
|
|
2612
|
-
|
|
2613
|
-
import os # noqa
|
|
2614
|
-
from megadetector.utils.wi_utils import generate_md_results_from_predictions_json # noqa
|
|
2615
|
-
|
|
2616
|
-
# detector_source = 'speciesnet'
|
|
2617
|
-
detector_source = 'md'
|
|
2618
|
-
|
|
2619
|
-
if False:
|
|
2620
|
-
image_folder = r'g:\temp\md-test-images'
|
|
2621
|
-
base_folder = '/home/dmorris/tmp/md-test-images/'
|
|
2622
|
-
if detector_source == 'speciesnet':
|
|
2623
|
-
predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output.json"
|
|
2624
|
-
md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format.json"
|
|
2625
|
-
else:
|
|
2626
|
-
assert detector_source == 'md'
|
|
2627
|
-
predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-from-md-results.json"
|
|
2628
|
-
md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format-from-md-results.json"
|
|
2629
|
-
else:
|
|
2630
|
-
image_folder = r'g:\temp\water-hole'
|
|
2631
|
-
base_folder = '/mnt/g/temp/water-hole/'
|
|
2632
|
-
if detector_source == 'speciesnet':
|
|
2633
|
-
predictions_json_file = r'g:\temp\water-hole\ensemble-output.json'
|
|
2634
|
-
md_results_file = r'g:\temp\water-hole\ensemble-output.md_format.json'
|
|
2635
|
-
else:
|
|
2636
|
-
assert detector_source == 'md'
|
|
2637
|
-
predictions_json_file = r'g:\temp\water-hole\ensemble-output-md.json'
|
|
2638
|
-
md_results_file = r'g:\temp\water-hole\ensemble-output-md.md_format.json'
|
|
2639
|
-
|
|
2640
|
-
generate_md_results_from_predictions_json(predictions_json_file=predictions_json_file,
|
|
2641
|
-
md_results_file=md_results_file,
|
|
2642
|
-
base_folder=base_folder)
|
|
2643
|
-
|
|
2644
|
-
# from megadetector.utils.path_utils import open_file; open_file(md_results_file)
|
|
2645
|
-
|
|
2646
|
-
assert os.path.isdir(image_folder)
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
#%% Preview
|
|
2650
|
-
|
|
2651
|
-
from megadetector.postprocessing.postprocess_batch_results import \
|
|
2652
|
-
PostProcessingOptions, process_batch_results
|
|
2653
|
-
from megadetector.utils import path_utils
|
|
2654
|
-
|
|
2655
|
-
render_animals_only = False
|
|
2656
|
-
|
|
2657
|
-
options = PostProcessingOptions()
|
|
2658
|
-
options.image_base_dir = image_folder
|
|
2659
|
-
options.include_almost_detections = True
|
|
2660
|
-
options.num_images_to_sample = None
|
|
2661
|
-
options.confidence_threshold = 0.2
|
|
2662
|
-
options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
|
|
2663
|
-
options.ground_truth_json_file = None
|
|
2664
|
-
options.separate_detections_by_category = True
|
|
2665
|
-
options.sample_seed = 0
|
|
2666
|
-
options.max_figures_per_html_file = 5000
|
|
2667
|
-
|
|
2668
|
-
options.parallelize_rendering = True
|
|
2669
|
-
options.parallelize_rendering_n_cores = 10
|
|
2670
|
-
options.parallelize_rendering_with_threads = True
|
|
2671
|
-
options.sort_classification_results_by_count = True
|
|
2672
|
-
|
|
2673
|
-
if render_animals_only:
|
|
2674
|
-
# Omit some pages from the output, useful when animals are rare
|
|
2675
|
-
options.rendering_bypass_sets = ['detections_person','detections_vehicle',
|
|
2676
|
-
'detections_person_vehicle','non_detections']
|
|
2677
|
-
|
|
2678
|
-
output_base = r'g:\temp\preview' + '_' + detector_source
|
|
2679
|
-
if render_animals_only:
|
|
2680
|
-
output_base = output_base + '_render_animals_only'
|
|
2681
|
-
os.makedirs(output_base, exist_ok=True)
|
|
2682
|
-
|
|
2683
|
-
print('Writing preview to {}'.format(output_base))
|
|
2684
|
-
|
|
2685
|
-
options.md_results_file = md_results_file
|
|
2686
|
-
options.output_dir = output_base
|
|
2687
|
-
ppresults = process_batch_results(options)
|
|
2688
|
-
html_output_file = ppresults.output_html_file
|
|
2689
|
-
|
|
2690
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2691
|
-
# import clipboard; clipboard.copy(html_output_file)
|