megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
  6. megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
  7. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  8. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  9. megadetector/data_management/remap_coco_categories.py +60 -11
  10. megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
  11. megadetector/data_management/yolo_to_coco.py +45 -15
  12. megadetector/detection/run_detector.py +1 -0
  13. megadetector/detection/run_detector_batch.py +5 -4
  14. megadetector/postprocessing/classification_postprocessing.py +788 -524
  15. megadetector/postprocessing/compare_batch_results.py +176 -9
  16. megadetector/postprocessing/create_crop_folder.py +420 -0
  17. megadetector/postprocessing/load_api_results.py +4 -1
  18. megadetector/postprocessing/md_to_coco.py +1 -1
  19. megadetector/postprocessing/postprocess_batch_results.py +158 -44
  20. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  21. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  22. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  23. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  24. megadetector/postprocessing/validate_batch_results.py +13 -5
  25. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  27. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  28. megadetector/utils/ct_utils.py +76 -3
  29. megadetector/utils/directory_listing.py +4 -4
  30. megadetector/utils/gpu_test.py +21 -3
  31. megadetector/utils/md_tests.py +142 -49
  32. megadetector/utils/path_utils.py +342 -19
  33. megadetector/utils/wi_utils.py +1286 -212
  34. megadetector/visualization/visualization_utils.py +16 -4
  35. megadetector/visualization/visualize_db.py +1 -1
  36. megadetector/visualization/visualize_detector_output.py +1 -4
  37. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
  38. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
  39. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
  40. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
  41. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
@@ -27,8 +27,12 @@ from tqdm import tqdm
27
27
 
28
28
  from megadetector.utils.path_utils import insert_before_extension
29
29
  from megadetector.utils.ct_utils import split_list_into_n_chunks
30
+ from megadetector.utils.ct_utils import round_floats_in_nested_dict
31
+ from megadetector.utils.ct_utils import is_list_sorted
30
32
  from megadetector.utils.ct_utils import invert_dictionary
31
33
  from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
34
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
35
+ from megadetector.utils.ct_utils import sort_dictionary_by_key
32
36
  from megadetector.utils.path_utils import find_images
33
37
  from megadetector.postprocessing.validate_batch_results import \
34
38
  validate_batch_results, ValidateBatchResultsOptions
@@ -58,10 +62,127 @@ def is_valid_prediction_string(s):
58
62
  Returns:
59
63
  bool: True if this looks more or less like a WI prediction string
60
64
  """
61
-
65
+
66
+ # Note to self... don't get tempted to remove spaces here; spaces are used
67
+ # to indicate subspecies.
62
68
  return isinstance(s,str) and (len(s.split(';')) == 7) and (s == s.lower())
63
69
 
64
70
 
71
+ def is_valid_taxonomy_string(s):
72
+ """
73
+ Determine whether [s] is a valid 5-token WI taxonomy string. Taxonmy strings look like:
74
+
75
+ 'mammalia;rodentia;;;;rodent'
76
+ 'mammalia;chordata;canidae;canis;lupus dingo'
77
+
78
+ Args:
79
+ s (str): the string to be tested for validity
80
+
81
+ Returns:
82
+ bool: True if this looks more or less like a WI taxonomy string
83
+ """
84
+ return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
85
+
86
+
87
+ def clean_taxonomy_string(s):
88
+ """
89
+ If [s] is a seven-token prediction string, trim the GUID and common name to produce
90
+ a "clean" taxonomy string. Else if [s] is a five-token string, return it. Else error.
91
+
92
+ Args:
93
+ s (str): the seven- or five-token taxonomy/prediction string to clean
94
+
95
+ Returns:
96
+ str: the five-token taxonomy string
97
+ """
98
+
99
+ if is_valid_taxonomy_string(s):
100
+ return s
101
+ elif is_valid_prediction_string(s):
102
+ tokens = s.split(';')
103
+ assert len(tokens) == 7
104
+ return ';'.join(tokens[1:-1])
105
+ else:
106
+ raise ValueError('Invalid taxonomy string')
107
+
108
+
109
+ taxonomy_level_names = \
110
+ ['non-taxonomic','kingdom','phylum','class','order','family','genus','species','subspecies']
111
+
112
+
113
+ def taxonomy_level_to_string(k):
114
+ """
115
+ Maps taxonomy level indices (0 for kindgom, 1 for phylum, etc.) to strings.
116
+
117
+ Args:
118
+ k (int): taxonomy level index
119
+
120
+ Returns:
121
+ str: taxonomy level string
122
+ """
123
+
124
+ assert k >= 0 and k < len(taxonomy_level_names), \
125
+ 'Illegal taxonomy level index {}'.format(k)
126
+
127
+ return taxonomy_level_names[k]
128
+
129
+
130
+ def taxonomy_level_string_to_index(s):
131
+ """
132
+ Maps strings ('kingdom', 'species', etc.) to level indices.
133
+
134
+ Args:
135
+ s (str): taxonomy level string
136
+
137
+ Returns:
138
+ int: taxonomy level index
139
+ """
140
+
141
+ assert s in taxonomy_level_names, 'Unrecognized taxonomy level string {}'.format(s)
142
+ return taxonomy_level_names.index(s)
143
+
144
+
145
+ def taxonomy_level_index(s):
146
+ """
147
+ Returns the taxonomy level up to which [s] is defined (0 for non-taxnomic, 1 for kingdom,
148
+ 2 for phylum, etc. Empty strings and non-taxonomic strings are treated as level 0. 1 and 2
149
+ will never be returned; "animal" doesn't look like other taxonomic strings, so here we treat
150
+ it as non-taxonomic.
151
+
152
+ Args:
153
+ s (str): 5-token or 7-token taxonomy string
154
+
155
+ Returns:
156
+ int: taxonomy level
157
+ """
158
+
159
+ if s in non_taxonomic_prediction_strings or s in non_taxonomic_prediction_short_strings:
160
+ return 0
161
+
162
+ tokens = s.split(';')
163
+ assert len(tokens) in (5,7)
164
+
165
+ if len(tokens) == 7:
166
+ tokens = tokens[1:-1]
167
+
168
+ if len(tokens[0]) == 0:
169
+ return 0
170
+ # WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
171
+ elif len(tokens[1]) == 0:
172
+ return 3
173
+ elif len(tokens[2]) == 0:
174
+ return 4
175
+ elif len(tokens[3]) == 0:
176
+ return 5
177
+ elif len(tokens[4]) == 0:
178
+ return 6
179
+ # Subspecies are delimited with a space
180
+ elif ' ' not in tokens[4]:
181
+ return 7
182
+ else:
183
+ return 8
184
+
185
+
65
186
  def wi_result_to_prediction_string(r):
66
187
  """
67
188
  Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
@@ -469,10 +590,18 @@ sample_update_payload = {
469
590
 
470
591
  blank_prediction_string = 'f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank'
471
592
  no_cv_result_prediction_string = 'f2efdae9-efb8-48fb-8a91-eccf79ab4ffb;no cv result;no cv result;no cv result;no cv result;no cv result;no cv result'
472
- rodent_prediction_string = '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
473
- mammal_prediction_string = 'f2d233e3-80e3-433d-9687-e29ecc7a467a;mammalia;;;;;mammal'
474
593
  animal_prediction_string = '1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal'
475
594
  human_prediction_string = '990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human'
595
+ vehicle_prediction_string = 'e2895ed5-780b-48f6-8a11-9e27cb594511;;;;;;vehicle'
596
+
597
+ non_taxonomic_prediction_strings = [blank_prediction_string,
598
+ no_cv_result_prediction_string,
599
+ animal_prediction_string,
600
+ vehicle_prediction_string]
601
+
602
+ non_taxonomic_prediction_short_strings = [';'.join(s.split(';')[1:-1]) for s in \
603
+ non_taxonomic_prediction_strings]
604
+
476
605
 
477
606
  process_cv_response_url = 'https://placeholder'
478
607
 
@@ -870,6 +999,7 @@ def get_kingdom(prediction_string):
870
999
  str: the kingdom field from the input string
871
1000
  """
872
1001
  tokens = prediction_string.split(';')
1002
+ assert is_valid_prediction_string(prediction_string)
873
1003
  return tokens[1]
874
1004
 
875
1005
 
@@ -885,6 +1015,19 @@ def is_human_classification(prediction_string):
885
1015
  bool: whether this string corresponds to a human category
886
1016
  """
887
1017
  return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
1018
+
1019
+
1020
+ def is_vehicle_classification(prediction_string):
1021
+ """
1022
+ Determines whether the input string represents a vehicle classification.
1023
+
1024
+ Args:
1025
+ prediction_string (str): a string in the semicolon-delimited prediction string format
1026
+
1027
+ Returns:
1028
+ bool: whether this string corresponds to the vehicle category
1029
+ """
1030
+ return prediction_string == vehicle_prediction_string
888
1031
 
889
1032
 
890
1033
  def is_animal_classification(prediction_string):
@@ -912,30 +1055,150 @@ def is_animal_classification(prediction_string):
912
1055
  return True
913
1056
 
914
1057
 
915
- def generate_md_results_from_predictions_json(predictions_json_file,md_results_file,base_folder=None):
1058
+ def generate_whole_image_detections_for_classifications(classifications_json_file,
1059
+ detections_json_file,
1060
+ ensemble_json_file=None,
1061
+ ignore_blank_classifications=True):
916
1062
  """
917
- Generate an MD-formatted .json file from a predictions.json file. Typically,
918
- MD results files use relative paths, and predictions.json files use absolute paths, so
919
- this function optionally removes the leading string [base_folder] from all file names.
1063
+ Given a set of classification results that were likely run on already-cropped
1064
+ image, generate a file of [fake] detections in which each image is covered
1065
+ in a single whole-image detection.
1066
+
1067
+ Args:
1068
+ classifications_json_file (str): SpeciesNet-formatted file containing classifications
1069
+ detections_json_file (str): SpeciesNet-formatted file to write with detections
1070
+ ensemble_json_file (str, optional): SpeciesNet-formatted file to write with detections
1071
+ and classfications
1072
+ ignore_blank_classifications (bool, optional): use non-top classifications when
1073
+ the top classification is "blank" or "no CV result"
1074
+
1075
+ Returns:
1076
+ dict: the contents of [detections_json_file]
1077
+ """
1078
+
1079
+ with open(classifications_json_file,'r') as f:
1080
+ classification_results = json.load(f)
1081
+ predictions = classification_results['predictions']
1082
+
1083
+ output_predictions = []
1084
+ ensemble_predictions = []
1085
+
1086
+ # prediction = predictions[0]
1087
+ for prediction in predictions:
1088
+
1089
+ output_prediction = {}
1090
+ output_prediction['filepath'] = prediction['filepath']
1091
+ i_score = 0
1092
+ if ignore_blank_classifications:
1093
+ while (prediction['classifications']['classes'][i_score] in \
1094
+ (blank_prediction_string,no_cv_result_prediction_string)):
1095
+ i_score += 1
1096
+ top_classification = prediction['classifications']['classes'][i_score]
1097
+ top_classification_score = prediction['classifications']['scores'][i_score]
1098
+ if is_animal_classification(top_classification):
1099
+ category_name = 'animal'
1100
+ elif is_human_classification(top_classification):
1101
+ category_name = 'human'
1102
+ else:
1103
+ category_name = 'vehicle'
1104
+
1105
+ if category_name == 'human':
1106
+ md_category_name = 'person'
1107
+ else:
1108
+ md_category_name = category_name
1109
+
1110
+ output_detection = {}
1111
+ output_detection['label'] = category_name
1112
+ output_detection['category'] = md_category_name_to_id[md_category_name]
1113
+ output_detection['conf'] = 1.0
1114
+ output_detection['bbox'] = [0.0, 0.0, 1.0, 1.0]
1115
+ output_prediction['detections'] = [output_detection]
1116
+ output_predictions.append(output_prediction)
1117
+
1118
+ ensemble_prediction = {}
1119
+ ensemble_prediction['filepath'] = prediction['filepath']
1120
+ ensemble_prediction['detections'] = [output_detection]
1121
+ ensemble_prediction['prediction'] = top_classification
1122
+ ensemble_prediction['prediction_score'] = top_classification_score
1123
+ ensemble_prediction['prediction_source'] = 'fake_ensemble_file_utility'
1124
+ ensemble_prediction['classifications'] = prediction['classifications']
1125
+ ensemble_predictions.append(ensemble_prediction)
1126
+
1127
+ # ...for each image
1128
+
1129
+ ## Write output
1130
+
1131
+ if ensemble_json_file is not None:
1132
+
1133
+ ensemble_output_data = {'predictions':ensemble_predictions}
1134
+ with open(ensemble_json_file,'w') as f:
1135
+ json.dump(ensemble_output_data,f,indent=1)
1136
+ _ = validate_predictions_file(ensemble_json_file)
1137
+
1138
+ output_data = {'predictions':output_predictions}
1139
+ with open(detections_json_file,'w') as f:
1140
+ json.dump(output_data,f,indent=1)
1141
+ return validate_predictions_file(detections_json_file)
1142
+
1143
+ # ...def generate_whole_image_detections_for_classifications(...)
1144
+
1145
+
1146
+ def generate_md_results_from_predictions_json(predictions_json_file,
1147
+ md_results_file,
1148
+ base_folder=None,
1149
+ max_decimals=5,
1150
+ convert_human_to_person=True):
1151
+ """
1152
+ Generate an MD-formatted .json file from a predictions.json file, generated by the
1153
+ SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
1154
+ files use absolute paths, so this function optionally removes the leading string
1155
+ [base_folder] from all file names.
1156
+
1157
+ Currently just applies the top classification category to every detection. If the top
1158
+ classification is "blank", writes an empty detection list.
1159
+
1160
+ Uses the classification from the "prediction" field if it's available, otherwise
1161
+ uses the "classifications" field.
920
1162
 
921
- Currently just applies the top classification category to every detection. If the top classification
922
- is "blank", writes an empty detection list.
1163
+ When using the "prediction" field, records the top class in the "classifications" field to
1164
+ a field in each image called "top_classification_common_name". This is often different
1165
+ from the value of the "prediction" field.
923
1166
 
924
- wi_to_md.py is a command-line driver for this function.
1167
+ speciesnet_to_md.py is a command-line driver for this function.
925
1168
 
926
1169
  Args:
927
- predictions_json_file (str): path to a predictions.json file
1170
+ predictions_json_file (str): path to a predictions.json file, or a dict
928
1171
  md_results_file (str): path to which we should write an MD-formatted .json file
929
- base_folder (str, optional): leading string to remove from each path in the predictions.json file
1172
+ base_folder (str, optional): leading string to remove from each path in the
1173
+ predictions.json file
1174
+ max_decimals (int, optional): number of decimal places to which we should round
1175
+ all values
1176
+ convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
1177
+ detection category "human"; MD files usually use "person". If True, switches "human"
1178
+ to "person".
930
1179
  """
931
1180
 
932
1181
  # Read predictions file
933
- with open(predictions_json_file,'r') as f:
934
- predictions = json.load(f)
1182
+ if isinstance(predictions_json_file,str):
1183
+ with open(predictions_json_file,'r') as f:
1184
+ predictions = json.load(f)
1185
+ else:
1186
+ assert isinstance(predictions_json_file,dict)
1187
+ predictions = predictions_json_file
1188
+
1189
+ # Round floating-point values (confidence scores, coordinates) to a
1190
+ # reasonable number of decimal places
1191
+ if max_decimals is not None and max_decimals > 0:
1192
+ round_floats_in_nested_dict(predictions)
1193
+
935
1194
  predictions = predictions['predictions']
936
1195
  assert isinstance(predictions,list)
937
1196
 
938
- from megadetector.utils.ct_utils import is_list_sorted
1197
+ # Convert backslashes to forward slashes in both filenames and the base folder string
1198
+ for im in predictions:
1199
+ im['filepath'] = im['filepath'].replace('\\','/')
1200
+ if base_folder is not None:
1201
+ base_folder = base_folder.replace('\\','/')
939
1202
 
940
1203
  detection_category_id_to_name = {}
941
1204
  classification_category_name_to_id = {}
@@ -948,6 +1211,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
948
1211
  # Create the output images list
949
1212
  images_out = []
950
1213
 
1214
+ base_folder_replacements = 0
1215
+
951
1216
  # im_in = predictions[0]
952
1217
  for im_in in predictions:
953
1218
 
@@ -957,6 +1222,7 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
957
1222
  fn = im_in['filepath']
958
1223
  if base_folder is not None:
959
1224
  if fn.startswith(base_folder):
1225
+ base_folder_replacements += 1
960
1226
  fn = fn.replace(base_folder,'',1)
961
1227
 
962
1228
  im_out['file'] = fn
@@ -990,7 +1256,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
990
1256
  # ...if detections are present
991
1257
 
992
1258
  class_to_assign = None
993
- class_confidence = None
1259
+ class_confidence = None
1260
+ top_classification_common_name = None
994
1261
 
995
1262
  if 'classifications' in im_in:
996
1263
 
@@ -1000,8 +1267,15 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1000
1267
  class_to_assign = classifications['classes'][0]
1001
1268
  class_confidence = classifications['scores'][0]
1002
1269
 
1270
+ tokens = class_to_assign.split(';')
1271
+ assert len(tokens) == 7
1272
+ top_classification_common_name = tokens[-1]
1273
+ if len(top_classification_common_name) == 0:
1274
+ top_classification_common_name = 'undefined'
1275
+
1003
1276
  if 'prediction' in im_in:
1004
1277
 
1278
+ im_out['top_classification_common_name'] = top_classification_common_name
1005
1279
  class_to_assign = im_in['prediction']
1006
1280
  class_confidence = im_in['prediction_score']
1007
1281
 
@@ -1056,8 +1330,12 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1056
1330
 
1057
1331
  # ...for each image
1058
1332
 
1059
- # Fix the 'unknown' category
1060
-
1333
+ if base_folder is not None:
1334
+ if base_folder_replacements == 0:
1335
+ print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
1336
+ base_folder))
1337
+
1338
+ # Fix the 'unknown' category
1061
1339
  if len(all_unknown_detections) > 0:
1062
1340
 
1063
1341
  max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
@@ -1075,7 +1353,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1075
1353
 
1076
1354
  # Prepare friendly classification names
1077
1355
 
1078
- classification_category_descriptions = invert_dictionary(classification_category_name_to_id)
1356
+ classification_category_descriptions = \
1357
+ invert_dictionary(classification_category_name_to_id)
1079
1358
  classification_categories_out = {}
1080
1359
  for category_id in classification_category_descriptions.keys():
1081
1360
  category_name = classification_category_descriptions[category_id].split(';')[-1]
@@ -1088,6 +1367,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1088
1367
  info['format_version'] = 1.4
1089
1368
  info['detector'] = 'converted_from_predictions_json'
1090
1369
 
1370
+ if convert_human_to_person:
1371
+ for k in detection_categories_out.keys():
1372
+ if detection_categories_out[k] == 'human':
1373
+ detection_categories_out[k] = 'person'
1374
+
1091
1375
  output_dict = {}
1092
1376
  output_dict['info'] = info
1093
1377
  output_dict['detection_categories'] = detection_categories_out
@@ -1105,7 +1389,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1105
1389
  # ...def generate_md_results_from_predictions_json(...)
1106
1390
 
1107
1391
 
1108
- def generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=None):
1392
+ def generate_predictions_json_from_md_results(md_results_file,
1393
+ predictions_json_file,
1394
+ base_folder=None):
1109
1395
  """
1110
1396
  Generate a predictions.json file from the MD-formatted .json file [md_results_file]. Typically,
1111
1397
  MD results files use relative paths, and predictions.json files use absolute paths, so
@@ -1166,12 +1452,16 @@ def generate_predictions_json_from_md_results(md_results_file,predictions_json_f
1166
1452
  # ...def generate_predictions_json_from_md_results(...)
1167
1453
 
1168
1454
 
1455
+ default_tokens_to_ignore = ['$RECYCLE.BIN']
1456
+
1169
1457
  def generate_instances_json_from_folder(folder,
1170
1458
  country=None,
1459
+ admin1_region=None,
1171
1460
  lat=None,
1172
1461
  lon=None,
1173
1462
  output_file=None,
1174
- filename_replacements=None):
1463
+ filename_replacements=None,
1464
+ tokens_to_ignore=default_tokens_to_ignore):
1175
1465
  """
1176
1466
  Generate an instances.json record that contains all images in [folder], optionally
1177
1467
  including location information, in a format suitable for run_model.py. Optionally writes
@@ -1186,6 +1476,8 @@ def generate_instances_json_from_folder(folder,
1186
1476
  filename_replacements (dict, optional): str --> str dict indicating filename substrings
1187
1477
  that should be replaced with other strings. Replacement occurs *after* converting
1188
1478
  backslashes to forward slashes.
1479
+ tokens_to_ignore (list, optional): ignore any images with these tokens in their
1480
+ names, typically used to avoid $RECYCLE.BIN. Can be None.
1189
1481
 
1190
1482
  Returns:
1191
1483
  dict: dict with at least the field "instances"
@@ -1195,6 +1487,13 @@ def generate_instances_json_from_folder(folder,
1195
1487
 
1196
1488
  image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
1197
1489
 
1490
+ if tokens_to_ignore is not None:
1491
+ n_images_before_ignore_tokens = len(image_files_abs)
1492
+ for token in tokens_to_ignore:
1493
+ image_files_abs = [fn for fn in image_files_abs if token not in fn]
1494
+ print('After ignoring {} tokens, kept {} of {} images'.format(
1495
+ len(tokens_to_ignore),len(image_files_abs),n_images_before_ignore_tokens))
1496
+
1198
1497
  instances = []
1199
1498
 
1200
1499
  # image_fn_abs = image_files_abs[0]
@@ -1206,6 +1505,8 @@ def generate_instances_json_from_folder(folder,
1206
1505
  instance['filepath'] = instance['filepath'].replace(s,filename_replacements[s])
1207
1506
  if country is not None:
1208
1507
  instance['country'] = country
1508
+ if admin1_region is not None:
1509
+ instance['admin1_region'] = admin1_region
1209
1510
  if lat is not None:
1210
1511
  assert lon is not None, 'Latitude provided without longitude'
1211
1512
  instance['latitude'] = lat
@@ -1226,14 +1527,286 @@ def generate_instances_json_from_folder(folder,
1226
1527
  # ...def generate_instances_json_from_folder(...)
1227
1528
 
1228
1529
 
1229
- #%% Functions related to geofencing and taxonomy mapping
1530
+ def split_instances_into_n_batches(instances_json,n_batches,output_files=None):
1531
+ """
1532
+ Given an instances.json file, split it into batches of equal size.
1533
+
1534
+ Args:
1535
+ instances_json (str): input .json file in
1536
+ n_batches (int): number of new files to generate
1537
+ output_files (list, optional): output .json files for each
1538
+ batch. If supplied, should have length [n_batches]. If not
1539
+ supplied, filenames will be generated based on [instances_json].
1540
+
1541
+ Returns:
1542
+ list: list of output files that were written; identical to [output_files]
1543
+ if it was supplied as input.
1544
+ """
1545
+
1546
+ with open(instances_json,'r') as f:
1547
+ instances = json.load(f)
1548
+ assert isinstance(instances,dict) and 'instances' in instances
1549
+ instances = instances['instances']
1550
+
1551
+ if output_files is not None:
1552
+ assert len(output_files) == n_batches, \
1553
+ 'Expected {} output files, received {}'.format(
1554
+ n_batches,len(output_files))
1555
+ else:
1556
+ output_files = []
1557
+ for i_batch in range(0,n_batches):
1558
+ batch_string = 'batch_{}'.format(str(i_batch).zfill(3))
1559
+ output_files.append(insert_before_extension(instances_json,batch_string))
1560
+
1561
+ batches = split_list_into_n_chunks(instances, n_batches)
1562
+
1563
+ for i_batch,batch in enumerate(batches):
1564
+ batch_dict = {'instances':batch}
1565
+ with open(output_files[i_batch],'w') as f:
1566
+ json.dump(batch_dict,f,indent=1)
1567
+
1568
+ print('Wrote {} batches to file'.format(n_batches))
1569
+
1570
+ return output_files
1571
+
1572
+
1573
+ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
1574
+ """
1575
+ Merge all predictions.json files in [files] into a single .json file.
1576
+
1577
+ Args:
1578
+ files (list): list of predictions.json files to merge
1579
+ output_file (str): output .json file
1580
+ """
1581
+
1582
+ predictions = []
1583
+ image_filenames_processed = set()
1584
+
1585
+ # input_json_fn = input_prediction_files[0]
1586
+ for input_json_fn in tqdm(input_prediction_files):
1587
+
1588
+ assert os.path.isfile(input_json_fn), \
1589
+ 'Could not find prediction file {}'.format(input_json_fn)
1590
+ with open(input_json_fn,'r') as f:
1591
+ results_this_file = json.load(f)
1592
+ assert isinstance(results_this_file,dict)
1593
+ predictions_this_file = results_this_file['predictions']
1594
+ for prediction in predictions_this_file:
1595
+ image_fn = prediction['filepath']
1596
+ assert image_fn not in image_filenames_processed
1597
+ predictions.extend(predictions_this_file)
1598
+
1599
+ output_dict = {'predictions':predictions}
1600
+
1601
+ os.makedirs(os.path.dirname(output_prediction_file),exist_ok=True)
1602
+ with open(output_prediction_file,'w') as f:
1603
+ json.dump(output_dict,f,indent=1)
1604
+
1605
+ # ...def merge_prediction_json_files(...)
1606
+
1607
+
1608
+ def validate_predictions_file(fn,instances=None,verbose=True):
1609
+ """
1610
+ Validate the predictions.json file [fn].
1611
+
1612
+ Args:
1613
+ fn (str): a .json file in predictions.json (SpeciesNet) format
1614
+ instances (str or list, optional): a folder, instances.json file,
1615
+ or dict loaded from an instances.json file. If supplied, this
1616
+ function will verify that [fn] contains the same number of
1617
+ images as [instances].
1618
+ verbose (bool, optional): enable additional debug output
1619
+
1620
+ Returns:
1621
+ dict: the contents of [fn]
1622
+ """
1623
+
1624
+ with open(fn,'r') as f:
1625
+ d = json.load(f)
1626
+ predictions = d['predictions']
1627
+
1628
+ failures = []
1629
+
1630
+ for im in predictions:
1631
+ if 'failures' in im:
1632
+ failures.append(im)
1633
+
1634
+ if verbose:
1635
+ print('Read predictions for {} images, with {} failure(s)'.format(
1636
+ len(d['predictions']),len(failures)))
1637
+
1638
+ if instances is not None:
1639
+
1640
+ if isinstance(instances,str):
1641
+ if os.path.isdir(instances):
1642
+ instances = generate_instances_json_from_folder(folder=instances)
1643
+ elif os.path.isfile(instances):
1644
+ with open(instances,'r') as f:
1645
+ instances = json.load(f)
1646
+ else:
1647
+ raise ValueError('Could not find instances file/folder {}'.format(
1648
+ instances))
1649
+ assert isinstance(instances,dict)
1650
+ assert 'instances' in instances
1651
+ instances = instances['instances']
1652
+ if verbose:
1653
+ print('Expected results for {} files'.format(len(instances)))
1654
+ assert len(instances) == len(predictions), \
1655
+ '{} instances expected, {} found'.format(
1656
+ len(instances),len(predictions))
1657
+
1658
+ expected_files = set([instance['filepath'] for instance in instances])
1659
+ found_files = set([prediction['filepath'] for prediction in predictions])
1660
+ assert expected_files == found_files
1661
+
1662
+ # ...if a list of instances was supplied
1663
+
1664
+ return d
1665
+
1666
+ # ...def validate_predictions_file(...)
1667
+
1668
+
1669
+ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
1670
+ """
1671
+ Count the number of instances of each unique change made by the geofence.
1672
+
1673
+ Args:
1674
+ ensemble_json_file (str): SpeciesNet-formatted .json file produced
1675
+ by the full ensemble.
1676
+ use_latin_names (bool, optional): return a mapping using binomial names
1677
+ rather than common names.
1678
+
1679
+ Returns:
1680
+ dict: maps strings that look like "puma,felidae family" to integers,
1681
+ where that entry would indicate the number of times that "puma" was
1682
+ predicted, but mapped to family level by the geofence. Sorted in
1683
+ descending order by count.
1684
+ """
1685
+
1686
+ # Load and validate ensemble results
1687
+ ensemble_results = validate_predictions_file(ensemble_json_file)
1688
+
1689
+ assert isinstance(ensemble_results,dict)
1690
+ predictions = ensemble_results['predictions']
1691
+
1692
+ # Maps comma-separated pairs of common names (or binomial names) to
1693
+ # the number of times that transition (first --> second) happened
1694
+ rollup_pair_to_count = defaultdict(int)
1695
+
1696
+ # prediction = predictions[0]
1697
+ for prediction in tqdm(predictions):
1698
+
1699
+ if 'failures' in prediction and \
1700
+ prediction['failures'] is not None and \
1701
+ len(prediction['failures']) > 0:
1702
+ continue
1703
+
1704
+ assert 'prediction_source' in prediction, \
1705
+ 'Prediction present without [prediction_source] field, are you sure this ' + \
1706
+ 'is an ensemble output file?'
1707
+
1708
+ if 'geofence' in prediction['prediction_source']:
1709
+
1710
+ classification_taxonomy_string = \
1711
+ prediction['classifications']['classes'][0]
1712
+ prediction_taxonomy_string = prediction['prediction']
1713
+ assert is_valid_prediction_string(classification_taxonomy_string)
1714
+ assert is_valid_prediction_string(prediction_taxonomy_string)
1715
+
1716
+ # Typical examples:
1717
+ # '86f5b978-4f30-40cc-bd08-be9e3fba27a0;mammalia;rodentia;sciuridae;sciurus;carolinensis;eastern gray squirrel'
1718
+ # 'e4d1e892-0e4b-475a-a8ac-b5c3502e0d55;mammalia;rodentia;sciuridae;;;sciuridae family'
1719
+ classification_common_name = classification_taxonomy_string.split(';')[-1]
1720
+ prediction_common_name = prediction_taxonomy_string.split(';')[-1]
1721
+ classification_binomial_name = classification_taxonomy_string.split(';')[-2]
1722
+ prediction_binomial_name = prediction_taxonomy_string.split(';')[-2]
1723
+
1724
+ input_name = classification_binomial_name if use_latin_names else \
1725
+ classification_common_name
1726
+ output_name = prediction_binomial_name if use_latin_names else \
1727
+ prediction_common_name
1728
+
1729
+ rollup_pair = input_name.strip() + ',' + output_name.strip()
1730
+ rollup_pair_to_count[rollup_pair] += 1
1731
+
1732
+ # ...if we made a geofencing change
1733
+
1734
+ # ...for each prediction
1735
+
1736
+ rollup_pair_to_count = sort_dictionary_by_value(rollup_pair_to_count,reverse=True)
1737
+
1738
+ return rollup_pair_to_count
1739
+
1740
+ # ...def find_geofence_adjustments(...)
1741
+
1742
+
1743
+ def generate_geofence_adjustment_html_summary(rollup_pair_to_count,min_count=10):
1744
+ """
1745
+ Given a list of geofence rollups, likely generated by find_geofence_adjustments,
1746
+ generate an HTML summary of the changes made by geofencing. The resulting HTML
1747
+ is wrapped in <div>, but not, for example, in <html> or <body>.
1748
+
1749
+ Args:
1750
+ rollup_pair_to_count (dict): list of changes made by geofencing, see
1751
+ find_geofence_adjustments for details
1752
+ min_count (int, optional): minimum number of changes a pair needs in order
1753
+ to be included in the report.
1754
+ """
1755
+
1756
+ geofence_footer = ''
1757
+
1758
+ # Restrict to the list of taxa that were impacted by geofencing
1759
+ rollup_pair_to_count = \
1760
+ {key: value for key, value in rollup_pair_to_count.items() if value >= min_count}
1761
+
1762
+ # rollup_pair_to_count is sorted in descending order by count
1763
+ assert is_list_sorted(list(rollup_pair_to_count.values()),reverse=True)
1764
+
1765
+ if len(rollup_pair_to_count) > 0:
1766
+
1767
+ geofence_footer = \
1768
+ '<h3>Geofence changes that occurred more than {} times</h3>\n'.format(min_count)
1769
+ geofence_footer += '<div class="contentdiv">\n'
1770
+
1771
+ print('\nRollup changes with count > {}:'.format(min_count))
1772
+ for rollup_pair in rollup_pair_to_count.keys():
1773
+ count = rollup_pair_to_count[rollup_pair]
1774
+ rollup_pair_s = rollup_pair.replace(',',' --> ')
1775
+ print('{}: {}'.format(rollup_pair_s,count))
1776
+ rollup_pair_html = rollup_pair.replace(',',' &rarr; ')
1777
+ geofence_footer += '{} ({})<br/>\n'.format(rollup_pair_html,count)
1778
+
1779
+ geofence_footer += '</div>\n'
1780
+
1781
+ return geofence_footer
1782
+
1783
+ # ...def generate_geofence_adjustment_html_summary(...)
1784
+
1785
+
1786
+ #%% Module-level globals related to taxonomy mapping and geofencing
1230
1787
 
1231
1788
  # This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
1232
1789
  # a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
1233
1790
  taxonomy_string_to_taxonomy_info = None
1791
+
1792
+ # Maps a binomial name (one, two, or three ws-delimited tokens) to the same dict described above.
1234
1793
  binomial_name_to_taxonomy_info = None
1794
+
1795
+ # Maps a common name to the same dict described above
1235
1796
  common_name_to_taxonomy_info = None
1236
1797
 
1798
+ # Dict mapping 5-token semicolon-delimited taxonomy strings to geofencing rules
1799
+ taxonomy_string_to_geofencing_rules = None
1800
+
1801
+ # Maps lower-case country names to upper-case country codes
1802
+ country_to_country_code = None
1803
+
1804
+ # Maps upper-case country codes to lower-case country names
1805
+ country_code_to_country = None
1806
+
1807
+
1808
+ #%% Functions related to geofencing and taxonomy mapping
1809
+
1237
1810
  def taxonomy_info_to_taxonomy_string(taxonomy_info):
1238
1811
  """
1239
1812
  Convert a taxonomy record in dict format to a semicolon-delimited string
@@ -1258,12 +1831,16 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1258
1831
  [common_name_to_taxonomy_info].
1259
1832
 
1260
1833
  Args:
1261
- taxonomy_file (str): .json file containing WI taxonomy information
1834
+ taxonomy_file (str): .json file containing mappings from the short taxonomy strings
1835
+ to the longer strings with GUID and common name, see example below.
1262
1836
  force_init (bool, optional): if the output dicts already exist, should we
1263
1837
  re-initialize anyway?
1264
1838
  encoding (str, optional): character encoding to use when opening the .json file
1265
1839
  """
1266
1840
 
1841
+ if encoding is None:
1842
+ encoding = 'cp1252'
1843
+
1267
1844
  global taxonomy_string_to_taxonomy_info
1268
1845
  global binomial_name_to_taxonomy_info
1269
1846
  global common_name_to_taxonomy_info
@@ -1322,26 +1899,174 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1322
1899
  common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
1323
1900
 
1324
1901
  taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
1325
- if tokens[4] == '' or tokens[5] == '':
1902
+
1903
+ binomial_name = None
1904
+ if len(tokens[4]) > 0 and len(tokens[5]) > 0:
1905
+ # strip(), but don't remove spaces from the species name;
1906
+ # subspecies are separated with a space, e.g. canis;lupus dingo
1907
+ binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1908
+ elif len(tokens[4]) > 0:
1909
+ binomial_name = tokens[4].strip()
1910
+ elif len(tokens[3]) > 0:
1911
+ binomial_name = tokens[3].strip()
1912
+ elif len(tokens[2]) > 0:
1913
+ binomial_name = tokens[2].strip()
1914
+ elif len(tokens[1]) > 0:
1915
+ binomial_name = tokens[1].strip()
1916
+ if binomial_name is None:
1326
1917
  # print('Warning: no binomial name for {}'.format(taxonomy_string))
1327
1918
  pass
1328
1919
  else:
1329
- binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1330
1920
  binomial_name_to_taxonomy_info[binomial_name] = taxon_info
1921
+
1922
+ print('Created {} records in taxonomy_string_to_taxonomy_info'.format(len(taxonomy_string_to_taxonomy_info)))
1923
+ print('Created {} records in common_name_to_taxonomy_info'.format(len(common_name_to_taxonomy_info)))
1331
1924
 
1332
1925
  # ...def initialize_taxonomy_info(...)
1333
1926
 
1334
1927
 
1335
- #%% Geofencing functions
1336
-
1337
- # Dict mapping semicolon-delimited taxonomy strings to geofencing rules
1338
- taxonomy_string_to_geofencing_rules = None
1928
+ def _parse_code_list(codes):
1929
+ """
1930
+ Turn a list of country or state codes in string, delimited string, or list format
1931
+ into a list. Also does basic validity checking.
1932
+ """
1933
+
1934
+ if not isinstance(codes,list):
1935
+
1936
+ assert isinstance(codes,str)
1937
+
1938
+ codes = codes.strip()
1939
+
1940
+ # This is just a single codes
1941
+ if ',' not in codes:
1942
+ codes = [codes]
1943
+ else:
1944
+ codes = codes.split(',')
1945
+ codes = [c.strip() for c in codes]
1946
+
1947
+ assert isinstance(codes,list)
1948
+
1949
+ codes = [c.upper().strip() for c in codes]
1950
+
1951
+ for c in codes:
1952
+ assert len(c) in (2,3)
1953
+
1954
+ return codes
1955
+
1956
+
1957
+ def _generate_csv_rows_to_block_all_countries_except(
1958
+ species_string,
1959
+ block_except_list):
1960
+ """
1961
+ Generate rows in the format expected by geofence_fixes.csv, representing a list of
1962
+ allow and block rules to block all countries currently allowed for this species
1963
+ except [allow_countries], and add allow rules these countries.
1964
+ """
1965
+
1966
+ assert is_valid_taxonomy_string(species_string), \
1967
+ '{} is not a valid taxonomy string'.format(species_string)
1968
+
1969
+ global taxonomy_string_to_taxonomy_info
1970
+ global binomial_name_to_taxonomy_info
1971
+ global common_name_to_taxonomy_info
1972
+
1973
+ assert taxonomy_string_to_geofencing_rules is not None, \
1974
+ 'Initialize geofencing prior to species lookup'
1975
+ assert taxonomy_string_to_taxonomy_info is not None, \
1976
+ 'Initialize taxonomy lookup prior to species lookup'
1977
+
1978
+ geofencing_rules_this_species = \
1979
+ taxonomy_string_to_geofencing_rules[species_string]
1980
+
1981
+ allowed_countries = []
1982
+ if 'allow' in geofencing_rules_this_species:
1983
+ allowed_countries.extend(geofencing_rules_this_species['allow'])
1984
+
1985
+ blocked_countries = []
1986
+ if 'block' in geofencing_rules_this_species:
1987
+ blocked_countries.extend(geofencing_rules_this_species['block'])
1988
+
1989
+ block_except_list = _parse_code_list(block_except_list)
1990
+
1991
+ countries_to_block = []
1992
+ countries_to_allow = []
1993
+
1994
+ # country = allowed_countries[0]
1995
+ for country in allowed_countries:
1996
+ if country not in block_except_list and country not in blocked_countries:
1997
+ countries_to_block.append(country)
1998
+
1999
+ for country in block_except_list:
2000
+ if country in blocked_countries:
2001
+ raise ValueError("I can't allow a country that has already been blocked")
2002
+ if country not in allowed_countries:
2003
+ countries_to_allow.append(country)
2004
+
2005
+ rows = generate_csv_rows_for_species(species_string,
2006
+ allow_countries=countries_to_allow,
2007
+ block_countries=countries_to_block)
2008
+
2009
+ return rows
2010
+
2011
+ # ...def _generate_csv_rows_to_block_all_countries_except(...)
2012
+
2013
+
2014
+ def generate_csv_rows_for_species(species_string,
2015
+ allow_countries=None,
2016
+ block_countries=None,
2017
+ allow_states=None,
2018
+ block_states=None,
2019
+ blockexcept_countries=None):
2020
+ """
2021
+ Generate rows in the format expected by geofence_fixes.csv, representing a list of
2022
+ allow and/or block rules for the specified species and countries/states. Does not check
2023
+ that the rules make sense; e.g. nothing will stop you in this function from both allowing
2024
+ and blocking a country.
2025
+
2026
+ Args:
2027
+ species_string (str): five-token string in semicolon-delimited WI taxonomy format
2028
+ allow_countries (optional, list or str): three-letter country codes, list of
2029
+ country codes, or comma-separated list of country codes to allow
2030
+ block_countries (optional, list or str): three-letter country codes, list of
2031
+ country codes, or comma-separated list of country codes to block
2032
+ allow_states (optional, list or str): two-letter state codes, list of
2033
+ state codes, or comma-separated list of state codes to allow
2034
+ block_states (optional, list or str): two-letter state code, list of
2035
+ state codes, or comma-separated list of state codes to block
2036
+
2037
+ Returns:
2038
+ list of str: lines ready to be pasted into geofence_fixes.csv
2039
+ """
2040
+
2041
+ assert is_valid_taxonomy_string(species_string), \
2042
+ '{} is not a valid taxonomy string'.format(species_string)
2043
+
2044
+ lines = []
2045
+
2046
+ if allow_countries is not None:
2047
+ allow_countries = _parse_code_list(allow_countries)
2048
+ for country in allow_countries:
2049
+ lines.append(species_string + ',allow,' + country + ',')
2050
+
2051
+ if block_countries is not None:
2052
+ block_countries = _parse_code_list(block_countries)
2053
+ for country in block_countries:
2054
+ lines.append(species_string + ',block,' + country + ',')
2055
+
2056
+ if allow_states is not None:
2057
+ allow_states = _parse_code_list(allow_states)
2058
+ for state in allow_states:
2059
+ lines.append(species_string + ',allow,USA,' + state)
2060
+
2061
+ if block_states is not None:
2062
+ block_states = _parse_code_list(block_states)
2063
+ for state in block_states:
2064
+ lines.append(species_string + ',block,USA,' + state)
2065
+
2066
+ return lines
1339
2067
 
1340
- # Maps lower-case country names to upper-case country codes
1341
- country_to_country_code = None
2068
+ # ...def generate_csv_rows_for_species(...)
1342
2069
 
1343
- # Maps upper-case country codes to lower-case country names
1344
- country_code_to_country = None
1345
2070
 
1346
2071
  def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1347
2072
  """
@@ -1351,10 +2076,13 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1351
2076
 
1352
2077
  Args:
1353
2078
  geofencing_file (str): .json file with geofencing rules
1354
- country_code_file (str): .csv file with country code mappings
2079
+ country_code_file (str): .csv file with country code mappings, in columns
2080
+ called "name" and "alpha-3", e.g. from
2081
+ https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
1355
2082
  force_init (bool, optional): if the output dicts already exist, should we
1356
2083
  re-initialize anyway?
1357
2084
  """
2085
+
1358
2086
  global taxonomy_string_to_geofencing_rules
1359
2087
  global country_to_country_code
1360
2088
  global country_code_to_country
@@ -1404,29 +2132,63 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1404
2132
 
1405
2133
  species_rules = taxonomy_string_to_geofencing_rules[species_string]
1406
2134
 
1407
- # Every country should *either* have allow rules or block rules, no countries
1408
- # currently have both
1409
- assert len(species_rules.keys()) == 1
1410
- rule_type = list(species_rules.keys())[0]
1411
- assert rule_type in ('allow','block')
1412
-
1413
- all_country_rules_this_species = species_rules[rule_type]
1414
- for country_code in all_country_rules_this_species.keys():
1415
-
1416
- assert country_code in country_code_to_country
2135
+ if len(species_rules.keys()) > 1:
2136
+ print('Warning: taxon {} has both allow and block rules'.format(species_string))
1417
2137
 
1418
- region_rules = all_country_rules_this_species[country_code]
2138
+ for rule_type in species_rules.keys():
1419
2139
 
1420
- # Right now we only have regional rules for the USA; these may be part of
1421
- # allow or block rules.
1422
- if len(region_rules) > 0:
1423
- assert country_code == 'USA'
2140
+ assert rule_type in ('allow','block')
2141
+ all_country_rules_this_species = species_rules[rule_type]
2142
+
2143
+ for country_code in all_country_rules_this_species.keys():
2144
+ assert country_code in country_code_to_country
2145
+ region_rules = all_country_rules_this_species[country_code]
2146
+ # Right now we only have regional rules for the USA; these may be part of
2147
+ # allow or block rules.
2148
+ if len(region_rules) > 0:
2149
+ assert country_code == 'USA'
1424
2150
 
1425
2151
  # ...for each species
1426
2152
 
1427
2153
  # ...def initialize_geofencing(...)
1428
2154
 
1429
2155
 
2156
+ def _species_string_to_canonical_species_string(species):
2157
+ """
2158
+ Convert a string that may be a 5-token species string, a binomial name,
2159
+ or a common name into a 5-token species string, using taxonomic lookup.
2160
+ """
2161
+
2162
+ global taxonomy_string_to_taxonomy_info
2163
+ global binomial_name_to_taxonomy_info
2164
+ global common_name_to_taxonomy_info
2165
+
2166
+ assert taxonomy_string_to_geofencing_rules is not None, \
2167
+ 'Initialize geofencing prior to species lookup'
2168
+ assert taxonomy_string_to_taxonomy_info is not None, \
2169
+ 'Initialize taxonomy lookup prior to species lookup'
2170
+
2171
+ species = species.lower()
2172
+
2173
+ # Turn "species" into a taxonomy string
2174
+
2175
+ # If this is already a taxonomy string...
2176
+ if len(species.split(';')) == 5:
2177
+ pass
2178
+ # If this is a common name...
2179
+ elif species in common_name_to_taxonomy_info:
2180
+ taxonomy_info = common_name_to_taxonomy_info[species]
2181
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
2182
+ # If this is a binomial name...
2183
+ elif (species in binomial_name_to_taxonomy_info):
2184
+ taxonomy_info = binomial_name_to_taxonomy_info[species]
2185
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
2186
+ else:
2187
+ raise ValueError('Could not find taxonomic information for {}'.format(species))
2188
+
2189
+ return taxonomy_string
2190
+
2191
+
1430
2192
  def species_allowed_in_country(species,country,state=None,return_status=False):
1431
2193
  """
1432
2194
  Determines whether [species] is allowed in [country], according to
@@ -1445,35 +2207,16 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1445
2207
  False. Returns a more detailed string if return_status is set.
1446
2208
  """
1447
2209
 
2210
+ global taxonomy_string_to_taxonomy_info
2211
+ global binomial_name_to_taxonomy_info
2212
+ global common_name_to_taxonomy_info
2213
+
1448
2214
  assert taxonomy_string_to_geofencing_rules is not None, \
1449
2215
  'Initialize geofencing prior to species lookup'
1450
2216
  assert taxonomy_string_to_taxonomy_info is not None, \
1451
2217
  'Initialize taxonomy lookup prior to species lookup'
1452
2218
 
1453
- # species = 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1454
- # species = 'didelphis marsupialis'
1455
- # country = 'Guatemala'
1456
-
1457
- # species = 'common opossum'
1458
-
1459
- species = species.lower()
1460
-
1461
- # Turn "species" into a taxonomy string
1462
-
1463
- # If this is already a taxonomy string...
1464
- if len(species.split(';')) == 5:
1465
- pass
1466
- # If this is a binomial name...
1467
- elif len(species.split(' ')) == 2 and (species in binomial_name_to_taxonomy_info):
1468
- taxonomy_info = binomial_name_to_taxonomy_info[species]
1469
- taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1470
- # If this is a common name...
1471
- elif species in common_name_to_taxonomy_info:
1472
- taxonomy_info = common_name_to_taxonomy_info[species]
1473
- taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1474
- else:
1475
- raise ValueError('Could not find taxonomic information for {}'.format(species))
1476
-
2219
+ taxonomy_string = _species_string_to_canonical_species_string(species)
1477
2220
 
1478
2221
  # Normalize [state]
1479
2222
 
@@ -1504,27 +2247,34 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1504
2247
  allowed_countries = []
1505
2248
  blocked_countries = []
1506
2249
 
1507
- assert len(geofencing_rules_this_species.keys()) == 1
1508
- rule_type = list(geofencing_rules_this_species.keys())[0]
1509
- assert rule_type in ('allow','block')
2250
+ rule_types_this_species = list(geofencing_rules_this_species.keys())
2251
+ for rule_type in rule_types_this_species:
2252
+ assert rule_type in ('allow','block')
1510
2253
 
1511
- if rule_type == 'allow':
1512
- allowed_countries = list(geofencing_rules_this_species['allow'])
1513
- else:
1514
- assert rule_type == 'block'
2254
+ if 'block' in rule_types_this_species:
1515
2255
  blocked_countries = list(geofencing_rules_this_species['block'])
2256
+ if 'allow' in rule_types_this_species:
2257
+ allowed_countries = list(geofencing_rules_this_species['allow'])
1516
2258
 
1517
2259
  status = None
2260
+
2261
+ # The convention is that block rules win over allow rules
1518
2262
  if country_code in blocked_countries:
1519
- status = 'blocked'
2263
+ if country_code in allowed_countries:
2264
+ status = 'blocked_over_allow'
2265
+ else:
2266
+ status = 'blocked'
1520
2267
  elif country_code in allowed_countries:
1521
2268
  status = 'allowed'
1522
- else:
2269
+ elif len(allowed_countries) > 0:
1523
2270
  # The convention is that if allow rules exist, any country not on that list
1524
2271
  # is blocked.
1525
- assert len(allowed_countries) > 0
1526
- return 'not_on_country_allow_list'
1527
-
2272
+ status = 'block_not_on_country_allow_list'
2273
+ else:
2274
+ # Only block rules exist for this species, and they don't include this country
2275
+ assert len(blocked_countries) > 0
2276
+ status = 'allow_not_on_block_list'
2277
+
1528
2278
  # Now let's see whether we have to deal with any regional rules
1529
2279
  if state is None:
1530
2280
 
@@ -1565,71 +2315,478 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1565
2315
  # ...def species_allowed_in_country(...)
1566
2316
 
1567
2317
 
1568
- #%% Interactive driver(s)
2318
+ def restrict_to_taxa_list(taxa_list,
2319
+ speciesnet_taxonomy_file,
2320
+ input_file,
2321
+ output_file,
2322
+ allow_walk_down=False):
2323
+ """
2324
+ Given a prediction file in MD .json format, likely without having had
2325
+ a geofence applied, apply a custom taxa list.
2326
+
2327
+ Args:
2328
+ taxa_list (str or list): list of latin names, or a text file containing
2329
+ a list of latin names. Optionally may contain a second (comma-delimited)
2330
+ column containing common names, used only for debugging. Latin names
2331
+ must exist in the SpeciesNet taxonomy.
2332
+ taxonomy_file (str): taxonomy filename, in the same format used for model
2333
+ release (with 7-token taxonomy entries)
2334
+ output_file (str): .json file to write, in MD format
2335
+ allow_walk_down (bool, optional): should we walk down the taxonomy tree
2336
+ when making mappings if a parent has only a single allowable child?
2337
+ For example, if only a single felid species is allowed, should other
2338
+ felid predictions be mapped to that species, as opposed to being mapped
2339
+ to the family?
2340
+ """
1569
2341
 
1570
- if False:
2342
+ ##%% Read target taxa list
1571
2343
 
1572
- pass
2344
+ if isinstance(taxa_list,str):
2345
+ assert os.path.isfile(taxa_list), \
2346
+ 'Could not find taxa list file {}'.format(taxa_list)
2347
+ with open(taxa_list,'r') as f:
2348
+ taxa_list = f.readlines()
2349
+
2350
+ taxa_list = [s.strip().lower() for s in taxa_list]
2351
+ taxa_list = [s for s in taxa_list if len(s) > 0]
2352
+
2353
+ target_latin_to_common = {}
2354
+ for s in taxa_list:
2355
+ if s.strip().startswith('#'):
2356
+ continue
2357
+ tokens = s.split(',')
2358
+ assert len(tokens) <= 2
2359
+ binomial_name = tokens[0]
2360
+ assert len(binomial_name.split(' ')) in (1,2,3), \
2361
+ 'Illegal binomial name in species list: {}'.format(binomial_name)
2362
+ if len(tokens) > 0:
2363
+ common_name = tokens[1].strip().lower()
2364
+ else:
2365
+ common_name = None
2366
+ assert binomial_name not in target_latin_to_common
2367
+ target_latin_to_common[binomial_name] = common_name
2368
+
2369
+
2370
+ ##%% Read taxonomy file
2371
+
2372
+ with open(speciesnet_taxonomy_file,'r') as f:
2373
+ speciesnet_taxonomy_list = f.readlines()
2374
+ speciesnet_taxonomy_list = [s.strip() for s in \
2375
+ speciesnet_taxonomy_list if len(s.strip()) > 0]
2376
+
2377
+ # Maps the latin name of every taxon to the corresponding full taxon string
2378
+ #
2379
+ # For species, the key is a binomial name
2380
+ speciesnet_latin_name_to_taxon_string = {}
2381
+ speciesnet_common_name_to_taxon_string = {}
2382
+
2383
+ def _insert_taxonomy_string(s):
2384
+
2385
+ tokens = s.split(';')
2386
+ assert len(tokens) == 7
2387
+
2388
+ guid = tokens[0] # noqa
2389
+ class_name = tokens[1]
2390
+ order = tokens[2]
2391
+ family = tokens[3]
2392
+ genus = tokens[4]
2393
+ species = tokens[5]
2394
+ common_name = tokens[6]
2395
+
2396
+ if len(class_name) == 0:
2397
+ assert common_name in ('animal','vehicle','blank')
2398
+ return
2399
+
2400
+ if len(species) > 0:
2401
+ assert all([len(s) > 0 for s in [genus,family,order]])
2402
+ binomial_name = genus + ' ' + species
2403
+ if binomial_name not in speciesnet_latin_name_to_taxon_string:
2404
+ speciesnet_latin_name_to_taxon_string[binomial_name] = s
2405
+ elif len(genus) > 0:
2406
+ assert all([len(s) > 0 for s in [family,order]])
2407
+ if genus not in speciesnet_latin_name_to_taxon_string:
2408
+ speciesnet_latin_name_to_taxon_string[genus] = s
2409
+ elif len(family) > 0:
2410
+ assert len(order) > 0
2411
+ if family not in speciesnet_latin_name_to_taxon_string:
2412
+ speciesnet_latin_name_to_taxon_string[family] = s
2413
+ elif len(order) > 0:
2414
+ if order not in speciesnet_latin_name_to_taxon_string:
2415
+ speciesnet_latin_name_to_taxon_string[order] = s
2416
+ else:
2417
+ if class_name not in speciesnet_latin_name_to_taxon_string:
2418
+ speciesnet_latin_name_to_taxon_string[class_name] = s
2419
+
2420
+ if len(common_name) > 0:
2421
+ if common_name not in speciesnet_common_name_to_taxon_string:
2422
+ speciesnet_common_name_to_taxon_string[common_name] = s
2423
+
2424
+ for s in speciesnet_taxonomy_list:
2425
+
2426
+ _insert_taxonomy_string(s)
2427
+
2428
+
2429
+ ##%% Make sure all parent taxa are represented in the taxonomy
2430
+
2431
+ # In theory any taxon that appears as the parent of another taxon should
2432
+ # also be in the taxonomy, but this isn't always true, so we fix it here.
2433
+
2434
+ new_taxon_string_to_missing_tokens = defaultdict(list)
2435
+
2436
+ # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
2437
+ for latin_name in speciesnet_latin_name_to_taxon_string.keys():
2438
+
2439
+ if 'no cv result' in latin_name:
2440
+ continue
2441
+
2442
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
2443
+ tokens = taxon_string.split(';')
2444
+
2445
+ # Don't process GUID, species, or common name
2446
+ # i_token = 6
2447
+ for i_token in range(1,len(tokens)-2):
2448
+
2449
+ test_token = tokens[i_token]
2450
+ if len(test_token) == 0:
2451
+ continue
2452
+
2453
+ # Do we need to make up a taxon for this token?
2454
+ if test_token not in speciesnet_latin_name_to_taxon_string:
2455
+
2456
+ new_tokens = [''] * 7
2457
+ new_tokens[0] = 'fake_guid'
2458
+ for i_copy_token in range(1,i_token+1):
2459
+ new_tokens[i_copy_token] = tokens[i_copy_token]
2460
+ new_tokens[-1] = test_token + ' species'
2461
+ assert new_tokens[-2] == ''
2462
+ new_taxon_string = ';'.join(new_tokens)
2463
+ # assert new_taxon_string not in new_taxon_strings
2464
+ new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
2465
+
2466
+ # ...for each token
2467
+
2468
+ # ...for each taxon
2469
+
2470
+ print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
2471
+ len(new_taxon_string_to_missing_tokens)))
2472
+
2473
+ new_taxon_string_to_missing_tokens = \
2474
+ sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
2475
+ for taxon_string in new_taxon_string_to_missing_tokens:
2476
+ missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
2477
+ print('{} ({})'.format(taxon_string,missing_taxa))
2478
+
2479
+ for new_taxon_string in new_taxon_string_to_missing_tokens:
2480
+ _insert_taxonomy_string(new_taxon_string)
2481
+
2482
+
2483
+ ##%% Make sure all species on the allow-list are in the taxonomy
2484
+
2485
+ n_failed_mappings = 0
2486
+
2487
+ for target_taxon_latin_name in target_latin_to_common.keys():
2488
+ if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
2489
+ common_name = target_latin_to_common[target_taxon_latin_name]
2490
+ s = '{} ({}) not in speciesnet taxonomy'.format(
2491
+ target_taxon_latin_name,common_name)
2492
+ if common_name in speciesnet_common_name_to_taxon_string:
2493
+ s += ' (common name maps to {})'.format(
2494
+ speciesnet_common_name_to_taxon_string[common_name])
2495
+ print(s)
2496
+ n_failed_mappings += 1
2497
+
2498
+ if n_failed_mappings > 0:
2499
+ raise ValueError('Cannot continue with geofence generation')
2500
+
2501
+
2502
+ ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
2503
+
2504
+ # Maps parent names to all allowed child names, or None if this is the
2505
+ # lowest-level allowable taxon on this path
2506
+ allowed_parent_taxon_to_child_taxa = defaultdict(set)
2507
+
2508
+ # latin_name = next(iter(target_latin_to_common.keys()))
2509
+ for latin_name in target_latin_to_common:
2510
+
2511
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
2512
+ tokens = taxon_string.split(';')
2513
+ assert len(tokens) == 7
2514
+
2515
+ # Remove GUID and common mame
2516
+ #
2517
+ # This is now always class/order/family/genus/species
2518
+ tokens = tokens[1:-1]
2519
+
2520
+ child_taxon = None
2521
+
2522
+ # If this is a species
2523
+ if len(tokens[-1]) > 0:
2524
+ binomial_name = tokens[-2] + ' ' + tokens[-1]
2525
+ assert binomial_name == latin_name
2526
+ allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
2527
+ child_taxon = binomial_name
2528
+
2529
+ # The first candidate parent is the genus
2530
+ parent_token_index = len(tokens) - 2
1573
2531
 
1574
- #%% instances.json generation test
2532
+ while(parent_token_index >= 0):
2533
+
2534
+ parent_taxon = tokens[parent_token_index]
2535
+ allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
2536
+ child_taxon = parent_taxon
2537
+ parent_token_index -= 1
2538
+
2539
+ # ...for each allowed latin name
2540
+
2541
+ allowed_parent_taxon_to_child_taxa = \
2542
+ sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
2543
+
2544
+
2545
+ ##%% Map all predictions that exist in this dataset...
2546
+
2547
+ # ...to the prediction we should generate.
2548
+
2549
+ with open(input_file,'r') as f:
2550
+ input_data = json.load(f)
2551
+
2552
+ input_category_id_to_common_name = input_data['classification_categories'] #noqa
2553
+ input_category_id_to_taxonomy_string = \
2554
+ input_data['classification_category_descriptions']
2555
+
2556
+ input_category_id_to_output_taxon_string = {}
2557
+
2558
+ # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
2559
+ for input_category_id in input_category_id_to_taxonomy_string.keys():
2560
+
2561
+ input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
2562
+ input_taxon_tokens = input_taxon_string.split(';')
2563
+ assert len(input_taxon_tokens) == 7
2564
+
2565
+ # Don't mess with blank/no-cv-result/animal/human
2566
+ if (input_taxon_string in non_taxonomic_prediction_strings) or \
2567
+ (input_taxon_string == human_prediction_string):
2568
+ input_category_id_to_output_taxon_string[input_category_id] = \
2569
+ input_taxon_string
2570
+ continue
2571
+
2572
+ # Remove GUID and common mame
2573
+ #
2574
+ # This is now always class/order/family/genus/species
2575
+ input_taxon_tokens = input_taxon_tokens[1:-1]
2576
+
2577
+ test_index = len(input_taxon_tokens) - 1
2578
+ target_taxon = None
2579
+
2580
+ # Start at the species level, and see whether each taxon is allowed
2581
+ while((test_index >= 0) and (target_taxon is None)):
2582
+
2583
+ # Species are represented as binomial names
2584
+ if (test_index == (len(input_taxon_tokens) - 1)) and \
2585
+ (len(input_taxon_tokens[-1]) > 0):
2586
+ test_taxon_name = \
2587
+ input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
2588
+ else:
2589
+ test_taxon_name = input_taxon_tokens[test_index]
2590
+
2591
+ # If we haven't yet found the level at which this taxon is non-empty,
2592
+ # keep going up
2593
+ if len(test_taxon_name) == 0:
2594
+ test_index -= 1
2595
+ continue
2596
+
2597
+ assert test_taxon_name in speciesnet_latin_name_to_taxon_string
2598
+
2599
+ # Is this taxon allowed according to the custom species list?
2600
+ if test_taxon_name in allowed_parent_taxon_to_child_taxa:
2601
+
2602
+ allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
2603
+ assert allowed_child_taxa is not None
2604
+
2605
+ # If this is the lowest-level allowable token or there is not a
2606
+ # unique child, don't walk any further, even if walking down
2607
+ # is enabled.
2608
+ if (None in allowed_child_taxa):
2609
+ assert len(allowed_child_taxa) == 1
2610
+
2611
+ if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
2612
+ target_taxon = test_taxon_name
2613
+ elif not allow_walk_down:
2614
+ target_taxon = test_taxon_name
2615
+ else:
2616
+ # If there's a unique child, walk back *down* the allowable
2617
+ # taxa until we run out of unique children
2618
+ while ((next(iter(allowed_child_taxa)) is not None) and \
2619
+ (len(allowed_child_taxa) == 1)):
2620
+ candidate_taxon = next(iter(allowed_child_taxa))
2621
+ assert candidate_taxon in allowed_parent_taxon_to_child_taxa
2622
+ assert candidate_taxon in speciesnet_latin_name_to_taxon_string
2623
+ allowed_child_taxa = \
2624
+ allowed_parent_taxon_to_child_taxa[candidate_taxon]
2625
+ target_taxon = candidate_taxon
2626
+
2627
+ # ...if this is an allowed taxon
2628
+
2629
+ test_index -= 1
2630
+
2631
+ # ...for each token
2632
+
2633
+ if target_taxon is None:
2634
+ output_taxon_string = animal_prediction_string
2635
+ else:
2636
+ output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
2637
+ input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
2638
+
2639
+ # ...for each category
2640
+
2641
+
2642
+ ##%% Build the new tables
1575
2643
 
1576
- from megadetector.utils.wi_utils import generate_instances_json_from_folder # noqa
2644
+ input_category_id_to_output_category_id = {}
2645
+ output_taxon_string_to_category_id = {}
2646
+ output_category_id_to_common_name = {}
2647
+
2648
+ for input_category_id in input_category_id_to_output_taxon_string:
2649
+
2650
+ original_common_name = \
2651
+ input_category_id_to_common_name[input_category_id]
2652
+ original_taxon_string = \
2653
+ input_category_id_to_taxonomy_string[input_category_id]
2654
+ output_taxon_string = \
2655
+ input_category_id_to_output_taxon_string[input_category_id]
2656
+
2657
+ output_common_name = output_taxon_string.split(';')[-1]
2658
+
2659
+ # Do we need to create a new output category?
2660
+ if output_taxon_string not in output_taxon_string_to_category_id:
2661
+ output_category_id = str(len(output_taxon_string_to_category_id))
2662
+ output_taxon_string_to_category_id[output_taxon_string] = \
2663
+ output_category_id
2664
+ output_category_id_to_common_name[output_category_id] = \
2665
+ output_common_name
2666
+ else:
2667
+ output_category_id = \
2668
+ output_taxon_string_to_category_id[output_taxon_string]
2669
+
2670
+ input_category_id_to_output_category_id[input_category_id] = \
2671
+ output_category_id
2672
+
2673
+ if False:
2674
+ print('Mapping {} ({}) to:\n{} ({})\n'.format(
2675
+ original_common_name,original_taxon_string,
2676
+ output_common_name,output_taxon_string))
2677
+ if False:
2678
+ print('Mapping {} to {}'.format(
2679
+ original_common_name,output_common_name,))
1577
2680
 
1578
- instances_file = r'g:\temp\water-hole\instances.json'
2681
+ # ...for each category
1579
2682
 
1580
- _ = generate_instances_json_from_folder(folder=r'g:\temp\water-hole',
1581
- country='NAM',
1582
- lat=None,
1583
- lon=None,
1584
- output_file=instances_file,
1585
- filename_replacements={'g:/temp':'/mnt/g/temp'})
2683
+
2684
+ ##%% Remap all category labels
2685
+
2686
+ assert len(set(output_taxon_string_to_category_id.keys())) == \
2687
+ len(set(output_taxon_string_to_category_id.values()))
2688
+
2689
+ output_category_id_to_taxon_string = \
2690
+ invert_dictionary(output_taxon_string_to_category_id)
2691
+
2692
+ with open(input_file,'r') as f:
2693
+ output_data = json.load(f)
2694
+
2695
+ for im in tqdm(output_data['images']):
2696
+ if 'detections' in im and im['detections'] is not None:
2697
+ for det in im['detections']:
2698
+ if 'classifications' in det:
2699
+ for classification in det['classifications']:
2700
+ classification[0] = \
2701
+ input_category_id_to_output_category_id[classification[0]]
2702
+
2703
+ output_data['classification_categories'] = output_category_id_to_common_name
2704
+ output_data['classification_category_descriptions'] = \
2705
+ output_category_id_to_taxon_string
2706
+
2707
+
2708
+ ##%% Write output
2709
+
2710
+ with open(output_file,'w') as f:
2711
+ json.dump(output_data,f,indent=1)
2712
+
2713
+
2714
+ #%% Interactive driver(s)
1586
2715
 
1587
- # from megadetector.utils.path_utils import open_file; open_file(instances_file)
2716
+ if False:
2717
+
2718
+ pass
1588
2719
 
2720
+ #%% Shared cell to initialize geofencing and taxonomy information
2721
+
2722
+ from megadetector.utils.wi_utils import species_allowed_in_country # noqa
2723
+ from megadetector.utils.wi_utils import initialize_geofencing, initialize_taxonomy_info # noqa
2724
+ from megadetector.utils.wi_utils import _species_string_to_canonical_species_string # noqa
2725
+ from megadetector.utils.wi_utils import generate_csv_rows_for_species # noqa
2726
+ from megadetector.utils.wi_utils import _generate_csv_rows_to_block_all_countries_except # noqa
1589
2727
 
1590
- #%% MD --> prediction conversion test
2728
+ from megadetector.utils.wi_utils import taxonomy_string_to_geofencing_rules # noqa
2729
+ from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
2730
+ from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
2731
+ from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
1591
2732
 
1592
- from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
1593
- md_results_file = r'G:\temp\md-test-images\mdv5a.relpaths.json'
1594
- predictions_json_file = r'\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\mdv5a.abspaths.predictions-format.json'
1595
- generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
1596
- '/home/dmorris/tmp/md-test-images/')
2733
+ model_base = os.path.expanduser('~/models/speciesnet')
2734
+ geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
2735
+ country_code_file = os.path.join(model_base,'country-codes.csv')
2736
+ # encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
2737
+ encoding = None; taxonomy_file = os.path.join(model_base,'taxonomy_mapping.json')
1597
2738
 
1598
- from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
1599
- md_results_file = r"G:\temp\water-hole\md_results.json"
1600
- predictions_json_file = r"G:\temp\water-hole\md_results-prediction_format.json"
1601
- generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
1602
- '/mnt/g/temp/water-hole')
2739
+ initialize_geofencing(geofencing_file, country_code_file, force_init=True)
2740
+ initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
2741
+
1603
2742
 
2743
+ #%% Test driver for geofence_fixes.csv function
1604
2744
 
1605
- #%% Geofencing tests
2745
+ block_except_list = 'AUS, PNG, THA, IDN, MYS'
2746
+ species = 'dingo'
2747
+ species_string = _species_string_to_canonical_species_string(species)
2748
+ rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
1606
2749
 
1607
- geofencing_file = r'g:\temp\geofence_mapping.json'
1608
- country_code_file = r'G:/temp/country-codes.csv'
1609
- encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
2750
+ # import clipboard; clipboard.copy('\n'.join(rows))
2751
+ print(rows)
1610
2752
 
1611
- initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
1612
- initialize_geofencing(geofencing_file, country_code_file, force_init=True)
1613
2753
 
1614
- species = 'didelphis marsupialis'
1615
- print(binomial_name_to_taxonomy_info[species])
1616
- country = 'Guatemala'
1617
- assert species_allowed_in_country(species, country)
2754
+ #%%
1618
2755
 
1619
- species = 'virginia opossum'
1620
- print(common_name_to_taxonomy_info[species])
1621
- country = 'USA'
1622
- assert species_allowed_in_country(species, country)
2756
+ taxon_name = 'hippopotamus amphibius'
2757
+ taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
2758
+ taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
2759
+ assert len(taxonomy_string_short.split(';')) == 5
1623
2760
 
2761
+ generate_csv_rows_for_species(species_string=taxonomy_string_short,
2762
+ allow_countries=['COL'],
2763
+ block_countries=None,
2764
+ allow_states=None,
2765
+ block_states=None,
2766
+ blockexcept_countries=None)
2767
+
2768
+ # _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
1624
2769
 
1625
- #%% Test several species
1626
2770
 
1627
- geofencing_file = r'g:\temp\geofence_mapping.json'
1628
- country_code_file = r'G:/temp/country-codes.csv'
1629
- encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
2771
+ #%% Test the effects of geofence changes
2772
+
2773
+ species = 'canis lupus dingo'
2774
+ country = 'guatemala'
2775
+ species_allowed_in_country(species,country,state=None,return_status=False)
2776
+
1630
2777
 
1631
- initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
1632
- initialize_geofencing(geofencing_file, country_code_file, force_init=True)
2778
+ #%% Geofencing lookups
2779
+
2780
+ # This can be a latin or common name
2781
+ species = 'hippopotamidae'
2782
+ # print(common_name_to_taxonomy_info[species])
2783
+
2784
+ # This can be a name or country code
2785
+ country = 'USA'
2786
+ print(species_allowed_in_country(species, country))
2787
+
2788
+
2789
+ #%% Bulk geofence lookups
1633
2790
 
1634
2791
  if True:
1635
2792
 
@@ -1709,86 +2866,3 @@ if False:
1709
2866
  if state is not None:
1710
2867
  state_string = ' ({})'.format(state)
1711
2868
  print('{} ({}) for {}{}: {}'.format(taxonomy_info['common_name'],species,country,state_string,allowed))
1712
-
1713
-
1714
- #%% Test conversion from predictons.json to MD format
1715
-
1716
- import os # noqa
1717
- from megadetector.utils.wi_utils import generate_md_results_from_predictions_json # noqa
1718
-
1719
- # detector_source = 'speciesnet'
1720
- detector_source = 'md'
1721
-
1722
- if False:
1723
- image_folder = r'g:\temp\md-test-images'
1724
- base_folder = '/home/dmorris/tmp/md-test-images/'
1725
- if detector_source == 'speciesnet':
1726
- predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output.json"
1727
- md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format.json"
1728
- else:
1729
- assert detector_source == 'md'
1730
- predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-from-md-results.json"
1731
- md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format-from-md-results.json"
1732
- else:
1733
- image_folder = r'g:\temp\water-hole'
1734
- base_folder = '/mnt/g/temp/water-hole/'
1735
- if detector_source == 'speciesnet':
1736
- predictions_json_file = r'g:\temp\water-hole\ensemble-output.json'
1737
- md_results_file = r'g:\temp\water-hole\ensemble-output.md_format.json'
1738
- else:
1739
- assert detector_source == 'md'
1740
- predictions_json_file = r'g:\temp\water-hole\ensemble-output-md.json'
1741
- md_results_file = r'g:\temp\water-hole\ensemble-output-md.md_format.json'
1742
-
1743
- generate_md_results_from_predictions_json(predictions_json_file=predictions_json_file,
1744
- md_results_file=md_results_file,
1745
- base_folder=base_folder)
1746
-
1747
- # from megadetector.utils.path_utils import open_file; open_file(md_results_file)
1748
-
1749
- assert os.path.isdir(image_folder)
1750
-
1751
-
1752
- #%% Preview
1753
-
1754
- from megadetector.postprocessing.postprocess_batch_results import \
1755
- PostProcessingOptions, process_batch_results
1756
- from megadetector.utils import path_utils
1757
-
1758
- render_animals_only = False
1759
-
1760
- options = PostProcessingOptions()
1761
- options.image_base_dir = image_folder
1762
- options.include_almost_detections = True
1763
- options.num_images_to_sample = None
1764
- options.confidence_threshold = 0.2
1765
- options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
1766
- options.ground_truth_json_file = None
1767
- options.separate_detections_by_category = True
1768
- options.sample_seed = 0
1769
- options.max_figures_per_html_file = 5000
1770
-
1771
- options.parallelize_rendering = True
1772
- options.parallelize_rendering_n_cores = 10
1773
- options.parallelize_rendering_with_threads = True
1774
- options.sort_classification_results_by_count = True
1775
-
1776
- if render_animals_only:
1777
- # Omit some pages from the output, useful when animals are rare
1778
- options.rendering_bypass_sets = ['detections_person','detections_vehicle',
1779
- 'detections_person_vehicle','non_detections']
1780
-
1781
- output_base = r'g:\temp\preview' + '_' + detector_source
1782
- if render_animals_only:
1783
- output_base = output_base + '_render_animals_only'
1784
- os.makedirs(output_base, exist_ok=True)
1785
-
1786
- print('Writing preview to {}'.format(output_base))
1787
-
1788
- options.md_results_file = md_results_file
1789
- options.output_dir = output_base
1790
- ppresults = process_batch_results(options)
1791
- html_output_file = ppresults.output_html_file
1792
-
1793
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
1794
- # import clipboard; clipboard.copy(html_output_file)