megadetector 5.0.25__py3-none-any.whl → 5.0.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  6. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  7. megadetector/data_management/remap_coco_categories.py +60 -11
  8. megadetector/data_management/yolo_to_coco.py +45 -15
  9. megadetector/postprocessing/classification_postprocessing.py +788 -524
  10. megadetector/postprocessing/create_crop_folder.py +95 -33
  11. megadetector/postprocessing/load_api_results.py +4 -1
  12. megadetector/postprocessing/md_to_coco.py +1 -1
  13. megadetector/postprocessing/postprocess_batch_results.py +156 -42
  14. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  15. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  16. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  17. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  18. megadetector/postprocessing/validate_batch_results.py +13 -5
  19. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  20. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  21. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  22. megadetector/utils/ct_utils.py +4 -2
  23. megadetector/utils/directory_listing.py +1 -1
  24. megadetector/utils/md_tests.py +2 -1
  25. megadetector/utils/path_utils.py +308 -19
  26. megadetector/utils/wi_utils.py +363 -186
  27. megadetector/visualization/visualization_utils.py +2 -1
  28. megadetector/visualization/visualize_db.py +1 -1
  29. megadetector/visualization/visualize_detector_output.py +1 -4
  30. {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/METADATA +4 -3
  31. {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/RECORD +34 -34
  32. {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/WHEEL +1 -1
  33. {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info/licenses}/LICENSE +0 -0
  34. {megadetector-5.0.25.dist-info → megadetector-5.0.27.dist-info}/top_level.txt +0 -0
@@ -84,6 +84,105 @@ def is_valid_taxonomy_string(s):
84
84
  return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
85
85
 
86
86
 
87
+ def clean_taxonomy_string(s):
88
+ """
89
+ If [s] is a seven-token prediction string, trim the GUID and common name to produce
90
+ a "clean" taxonomy string. Else if [s] is a five-token string, return it. Else error.
91
+
92
+ Args:
93
+ s (str): the seven- or five-token taxonomy/prediction string to clean
94
+
95
+ Returns:
96
+ str: the five-token taxonomy string
97
+ """
98
+
99
+ if is_valid_taxonomy_string(s):
100
+ return s
101
+ elif is_valid_prediction_string(s):
102
+ tokens = s.split(';')
103
+ assert len(tokens) == 7
104
+ return ';'.join(tokens[1:-1])
105
+ else:
106
+ raise ValueError('Invalid taxonomy string')
107
+
108
+
109
+ taxonomy_level_names = \
110
+ ['non-taxonomic','kingdom','phylum','class','order','family','genus','species','subspecies']
111
+
112
+
113
+ def taxonomy_level_to_string(k):
114
+ """
115
+ Maps taxonomy level indices (0 for kindgom, 1 for phylum, etc.) to strings.
116
+
117
+ Args:
118
+ k (int): taxonomy level index
119
+
120
+ Returns:
121
+ str: taxonomy level string
122
+ """
123
+
124
+ assert k >= 0 and k < len(taxonomy_level_names), \
125
+ 'Illegal taxonomy level index {}'.format(k)
126
+
127
+ return taxonomy_level_names[k]
128
+
129
+
130
+ def taxonomy_level_string_to_index(s):
131
+ """
132
+ Maps strings ('kingdom', 'species', etc.) to level indices.
133
+
134
+ Args:
135
+ s (str): taxonomy level string
136
+
137
+ Returns:
138
+ int: taxonomy level index
139
+ """
140
+
141
+ assert s in taxonomy_level_names, 'Unrecognized taxonomy level string {}'.format(s)
142
+ return taxonomy_level_names.index(s)
143
+
144
+
145
+ def taxonomy_level_index(s):
146
+ """
147
+ Returns the taxonomy level up to which [s] is defined (0 for non-taxnomic, 1 for kingdom,
148
+ 2 for phylum, etc. Empty strings and non-taxonomic strings are treated as level 0. 1 and 2
149
+ will never be returned; "animal" doesn't look like other taxonomic strings, so here we treat
150
+ it as non-taxonomic.
151
+
152
+ Args:
153
+ s (str): 5-token or 7-token taxonomy string
154
+
155
+ Returns:
156
+ int: taxonomy level
157
+ """
158
+
159
+ if s in non_taxonomic_prediction_strings or s in non_taxonomic_prediction_short_strings:
160
+ return 0
161
+
162
+ tokens = s.split(';')
163
+ assert len(tokens) in (5,7)
164
+
165
+ if len(tokens) == 7:
166
+ tokens = tokens[1:-1]
167
+
168
+ if len(tokens[0]) == 0:
169
+ return 0
170
+ # WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
171
+ elif len(tokens[1]) == 0:
172
+ return 3
173
+ elif len(tokens[2]) == 0:
174
+ return 4
175
+ elif len(tokens[3]) == 0:
176
+ return 5
177
+ elif len(tokens[4]) == 0:
178
+ return 6
179
+ # Subspecies are delimited with a space
180
+ elif ' ' not in tokens[4]:
181
+ return 7
182
+ else:
183
+ return 8
184
+
185
+
87
186
  def wi_result_to_prediction_string(r):
88
187
  """
89
188
  Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
@@ -500,6 +599,10 @@ non_taxonomic_prediction_strings = [blank_prediction_string,
500
599
  animal_prediction_string,
501
600
  vehicle_prediction_string]
502
601
 
602
+ non_taxonomic_prediction_short_strings = [';'.join(s.split(';')[1:-1]) for s in \
603
+ non_taxonomic_prediction_strings]
604
+
605
+
503
606
  process_cv_response_url = 'https://placeholder'
504
607
 
505
608
 
@@ -912,6 +1015,19 @@ def is_human_classification(prediction_string):
912
1015
  bool: whether this string corresponds to a human category
913
1016
  """
914
1017
  return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
1018
+
1019
+
1020
+ def is_vehicle_classification(prediction_string):
1021
+ """
1022
+ Determines whether the input string represents a vehicle classification.
1023
+
1024
+ Args:
1025
+ prediction_string (str): a string in the semicolon-delimited prediction string format
1026
+
1027
+ Returns:
1028
+ bool: whether this string corresponds to the vehicle category
1029
+ """
1030
+ return prediction_string == vehicle_prediction_string
915
1031
 
916
1032
 
917
1033
  def is_animal_classification(prediction_string):
@@ -939,17 +1055,114 @@ def is_animal_classification(prediction_string):
939
1055
  return True
940
1056
 
941
1057
 
1058
+ def generate_whole_image_detections_for_classifications(classifications_json_file,
1059
+ detections_json_file,
1060
+ ensemble_json_file=None,
1061
+ ignore_blank_classifications=True):
1062
+ """
1063
+ Given a set of classification results that were likely run on already-cropped
1064
+ image, generate a file of [fake] detections in which each image is covered
1065
+ in a single whole-image detection.
1066
+
1067
+ Args:
1068
+ classifications_json_file (str): SpeciesNet-formatted file containing classifications
1069
+ detections_json_file (str): SpeciesNet-formatted file to write with detections
1070
+ ensemble_json_file (str, optional): SpeciesNet-formatted file to write with detections
1071
+ and classfications
1072
+ ignore_blank_classifications (bool, optional): use non-top classifications when
1073
+ the top classification is "blank" or "no CV result"
1074
+
1075
+ Returns:
1076
+ dict: the contents of [detections_json_file]
1077
+ """
1078
+
1079
+ with open(classifications_json_file,'r') as f:
1080
+ classification_results = json.load(f)
1081
+ predictions = classification_results['predictions']
1082
+
1083
+ output_predictions = []
1084
+ ensemble_predictions = []
1085
+
1086
+ # prediction = predictions[0]
1087
+ for prediction in predictions:
1088
+
1089
+ output_prediction = {}
1090
+ output_prediction['filepath'] = prediction['filepath']
1091
+ i_score = 0
1092
+ if ignore_blank_classifications:
1093
+ while (prediction['classifications']['classes'][i_score] in \
1094
+ (blank_prediction_string,no_cv_result_prediction_string)):
1095
+ i_score += 1
1096
+ top_classification = prediction['classifications']['classes'][i_score]
1097
+ top_classification_score = prediction['classifications']['scores'][i_score]
1098
+ if is_animal_classification(top_classification):
1099
+ category_name = 'animal'
1100
+ elif is_human_classification(top_classification):
1101
+ category_name = 'human'
1102
+ else:
1103
+ category_name = 'vehicle'
1104
+
1105
+ if category_name == 'human':
1106
+ md_category_name = 'person'
1107
+ else:
1108
+ md_category_name = category_name
1109
+
1110
+ output_detection = {}
1111
+ output_detection['label'] = category_name
1112
+ output_detection['category'] = md_category_name_to_id[md_category_name]
1113
+ output_detection['conf'] = 1.0
1114
+ output_detection['bbox'] = [0.0, 0.0, 1.0, 1.0]
1115
+ output_prediction['detections'] = [output_detection]
1116
+ output_predictions.append(output_prediction)
1117
+
1118
+ ensemble_prediction = {}
1119
+ ensemble_prediction['filepath'] = prediction['filepath']
1120
+ ensemble_prediction['detections'] = [output_detection]
1121
+ ensemble_prediction['prediction'] = top_classification
1122
+ ensemble_prediction['prediction_score'] = top_classification_score
1123
+ ensemble_prediction['prediction_source'] = 'fake_ensemble_file_utility'
1124
+ ensemble_prediction['classifications'] = prediction['classifications']
1125
+ ensemble_predictions.append(ensemble_prediction)
1126
+
1127
+ # ...for each image
1128
+
1129
+ ## Write output
1130
+
1131
+ if ensemble_json_file is not None:
1132
+
1133
+ ensemble_output_data = {'predictions':ensemble_predictions}
1134
+ with open(ensemble_json_file,'w') as f:
1135
+ json.dump(ensemble_output_data,f,indent=1)
1136
+ _ = validate_predictions_file(ensemble_json_file)
1137
+
1138
+ output_data = {'predictions':output_predictions}
1139
+ with open(detections_json_file,'w') as f:
1140
+ json.dump(output_data,f,indent=1)
1141
+ return validate_predictions_file(detections_json_file)
1142
+
1143
+ # ...def generate_whole_image_detections_for_classifications(...)
1144
+
1145
+
942
1146
  def generate_md_results_from_predictions_json(predictions_json_file,
943
1147
  md_results_file,
944
1148
  base_folder=None,
945
- max_decimals=5):
1149
+ max_decimals=5,
1150
+ convert_human_to_person=True):
946
1151
  """
947
- Generate an MD-formatted .json file from a predictions.json file. Typically,
948
- MD results files use relative paths, and predictions.json files use absolute paths, so
949
- this function optionally removes the leading string [base_folder] from all file names.
1152
+ Generate an MD-formatted .json file from a predictions.json file, generated by the
1153
+ SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
1154
+ files use absolute paths, so this function optionally removes the leading string
1155
+ [base_folder] from all file names.
1156
+
1157
+ Currently just applies the top classification category to every detection. If the top
1158
+ classification is "blank", writes an empty detection list.
1159
+
1160
+ Uses the classification from the "prediction" field if it's available, otherwise
1161
+ uses the "classifications" field.
950
1162
 
951
- Currently just applies the top classification category to every detection. If the top classification
952
- is "blank", writes an empty detection list.
1163
+ When using the "prediction" field, records the top class in the "classifications" field to
1164
+ a field in each image called "top_classification_common_name". This is often different
1165
+ from the value of the "prediction" field.
953
1166
 
954
1167
  speciesnet_to_md.py is a command-line driver for this function.
955
1168
 
@@ -960,6 +1173,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,
960
1173
  predictions.json file
961
1174
  max_decimals (int, optional): number of decimal places to which we should round
962
1175
  all values
1176
+ convert_human_to_person (bool, optional): WI predictions.json files sometimes use the
1177
+ detection category "human"; MD files usually use "person". If True, switches "human"
1178
+ to "person".
963
1179
  """
964
1180
 
965
1181
  # Read predictions file
@@ -1040,7 +1256,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1040
1256
  # ...if detections are present
1041
1257
 
1042
1258
  class_to_assign = None
1043
- class_confidence = None
1259
+ class_confidence = None
1260
+ top_classification_common_name = None
1044
1261
 
1045
1262
  if 'classifications' in im_in:
1046
1263
 
@@ -1050,8 +1267,15 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1050
1267
  class_to_assign = classifications['classes'][0]
1051
1268
  class_confidence = classifications['scores'][0]
1052
1269
 
1270
+ tokens = class_to_assign.split(';')
1271
+ assert len(tokens) == 7
1272
+ top_classification_common_name = tokens[-1]
1273
+ if len(top_classification_common_name) == 0:
1274
+ top_classification_common_name = 'undefined'
1275
+
1053
1276
  if 'prediction' in im_in:
1054
1277
 
1278
+ im_out['top_classification_common_name'] = top_classification_common_name
1055
1279
  class_to_assign = im_in['prediction']
1056
1280
  class_confidence = im_in['prediction_score']
1057
1281
 
@@ -1111,8 +1335,7 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1111
1335
  print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
1112
1336
  base_folder))
1113
1337
 
1114
- # Fix the 'unknown' category
1115
-
1338
+ # Fix the 'unknown' category
1116
1339
  if len(all_unknown_detections) > 0:
1117
1340
 
1118
1341
  max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
@@ -1144,6 +1367,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,
1144
1367
  info['format_version'] = 1.4
1145
1368
  info['detector'] = 'converted_from_predictions_json'
1146
1369
 
1370
+ if convert_human_to_person:
1371
+ for k in detection_categories_out.keys():
1372
+ if detection_categories_out[k] == 'human':
1373
+ detection_categories_out[k] = 'person'
1374
+
1147
1375
  output_dict = {}
1148
1376
  output_dict['info'] = info
1149
1377
  output_dict['detection_categories'] = detection_categories_out
@@ -1223,6 +1451,7 @@ def generate_predictions_json_from_md_results(md_results_file,
1223
1451
 
1224
1452
  # ...def generate_predictions_json_from_md_results(...)
1225
1453
 
1454
+
1226
1455
  default_tokens_to_ignore = ['$RECYCLE.BIN']
1227
1456
 
1228
1457
  def generate_instances_json_from_folder(folder,
@@ -1403,7 +1632,7 @@ def validate_predictions_file(fn,instances=None,verbose=True):
1403
1632
  failures.append(im)
1404
1633
 
1405
1634
  if verbose:
1406
- print('Read detector results for {} images, with {} failure(s)'.format(
1635
+ print('Read predictions for {} images, with {} failure(s)'.format(
1407
1636
  len(d['predictions']),len(failures)))
1408
1637
 
1409
1638
  if instances is not None:
@@ -1454,6 +1683,7 @@ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
1454
1683
  descending order by count.
1455
1684
  """
1456
1685
 
1686
+ # Load and validate ensemble results
1457
1687
  ensemble_results = validate_predictions_file(ensemble_json_file)
1458
1688
 
1459
1689
  assert isinstance(ensemble_results,dict)
@@ -1510,14 +1740,56 @@ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
1510
1740
  # ...def find_geofence_adjustments(...)
1511
1741
 
1512
1742
 
1743
+ def generate_geofence_adjustment_html_summary(rollup_pair_to_count,min_count=10):
1744
+ """
1745
+ Given a list of geofence rollups, likely generated by find_geofence_adjustments,
1746
+ generate an HTML summary of the changes made by geofencing. The resulting HTML
1747
+ is wrapped in <div>, but not, for example, in <html> or <body>.
1748
+
1749
+ Args:
1750
+ rollup_pair_to_count (dict): list of changes made by geofencing, see
1751
+ find_geofence_adjustments for details
1752
+ min_count (int, optional): minimum number of changes a pair needs in order
1753
+ to be included in the report.
1754
+ """
1755
+
1756
+ geofence_footer = ''
1757
+
1758
+ # Restrict to the list of taxa that were impacted by geofencing
1759
+ rollup_pair_to_count = \
1760
+ {key: value for key, value in rollup_pair_to_count.items() if value >= min_count}
1761
+
1762
+ # rollup_pair_to_count is sorted in descending order by count
1763
+ assert is_list_sorted(list(rollup_pair_to_count.values()),reverse=True)
1764
+
1765
+ if len(rollup_pair_to_count) > 0:
1766
+
1767
+ geofence_footer = \
1768
+ '<h3>Geofence changes that occurred more than {} times</h3>\n'.format(min_count)
1769
+ geofence_footer += '<div class="contentdiv">\n'
1770
+
1771
+ print('\nRollup changes with count > {}:'.format(min_count))
1772
+ for rollup_pair in rollup_pair_to_count.keys():
1773
+ count = rollup_pair_to_count[rollup_pair]
1774
+ rollup_pair_s = rollup_pair.replace(',',' --> ')
1775
+ print('{}: {}'.format(rollup_pair_s,count))
1776
+ rollup_pair_html = rollup_pair.replace(',',' &rarr; ')
1777
+ geofence_footer += '{} ({})<br/>\n'.format(rollup_pair_html,count)
1778
+
1779
+ geofence_footer += '</div>\n'
1780
+
1781
+ return geofence_footer
1782
+
1783
+ # ...def generate_geofence_adjustment_html_summary(...)
1784
+
1785
+
1513
1786
  #%% Module-level globals related to taxonomy mapping and geofencing
1514
1787
 
1515
1788
  # This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
1516
1789
  # a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
1517
1790
  taxonomy_string_to_taxonomy_info = None
1518
1791
 
1519
- # Maps a binomial name (possibly three tokens, if it's a subspecies) to the same dict
1520
- # described above.
1792
+ # Maps a binomial name (one, two, or three ws-delimited tokens) to the same dict described above.
1521
1793
  binomial_name_to_taxonomy_info = None
1522
1794
 
1523
1795
  # Maps a common name to the same dict described above
@@ -1627,17 +1899,28 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1627
1899
  common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
1628
1900
 
1629
1901
  taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
1630
- if tokens[4] == '' or tokens[5] == '':
1902
+
1903
+ binomial_name = None
1904
+ if len(tokens[4]) > 0 and len(tokens[5]) > 0:
1905
+ # strip(), but don't remove spaces from the species name;
1906
+ # subspecies are separated with a space, e.g. canis;lupus dingo
1907
+ binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1908
+ elif len(tokens[4]) > 0:
1909
+ binomial_name = tokens[4].strip()
1910
+ elif len(tokens[3]) > 0:
1911
+ binomial_name = tokens[3].strip()
1912
+ elif len(tokens[2]) > 0:
1913
+ binomial_name = tokens[2].strip()
1914
+ elif len(tokens[1]) > 0:
1915
+ binomial_name = tokens[1].strip()
1916
+ if binomial_name is None:
1631
1917
  # print('Warning: no binomial name for {}'.format(taxonomy_string))
1632
1918
  pass
1633
1919
  else:
1634
- # strip(), but don't remove spaces from the species name;
1635
- # subspecies are separated with a space, e.g. canis;lupus dingo
1636
- binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1637
1920
  binomial_name_to_taxonomy_info[binomial_name] = taxon_info
1638
1921
 
1639
- print('Created {} records in taxonomy_string_to_taxonomy_info'.format(
1640
- len(taxonomy_string_to_taxonomy_info)))
1922
+ print('Created {} records in taxonomy_string_to_taxonomy_info'.format(len(taxonomy_string_to_taxonomy_info)))
1923
+ print('Created {} records in common_name_to_taxonomy_info'.format(len(common_name_to_taxonomy_info)))
1641
1924
 
1642
1925
  # ...def initialize_taxonomy_info(...)
1643
1926
 
@@ -1741,7 +2024,7 @@ def generate_csv_rows_for_species(species_string,
1741
2024
  and blocking a country.
1742
2025
 
1743
2026
  Args:
1744
- species_string (str): string in semicolon-delimited WI taxonomy format
2027
+ species_string (str): five-token string in semicolon-delimited WI taxonomy format
1745
2028
  allow_countries (optional, list or str): three-letter country codes, list of
1746
2029
  country codes, or comma-separated list of country codes to allow
1747
2030
  block_countries (optional, list or str): three-letter country codes, list of
@@ -1849,23 +2132,21 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1849
2132
 
1850
2133
  species_rules = taxonomy_string_to_geofencing_rules[species_string]
1851
2134
 
1852
- # Every country should *either* have allow rules or block rules, no countries
1853
- # currently have both
1854
- assert len(species_rules.keys()) == 1
1855
- rule_type = list(species_rules.keys())[0]
1856
- assert rule_type in ('allow','block')
1857
-
1858
- all_country_rules_this_species = species_rules[rule_type]
1859
- for country_code in all_country_rules_this_species.keys():
1860
-
1861
- assert country_code in country_code_to_country
2135
+ if len(species_rules.keys()) > 1:
2136
+ print('Warning: taxon {} has both allow and block rules'.format(species_string))
1862
2137
 
1863
- region_rules = all_country_rules_this_species[country_code]
2138
+ for rule_type in species_rules.keys():
2139
+
2140
+ assert rule_type in ('allow','block')
2141
+ all_country_rules_this_species = species_rules[rule_type]
1864
2142
 
1865
- # Right now we only have regional rules for the USA; these may be part of
1866
- # allow or block rules.
1867
- if len(region_rules) > 0:
1868
- assert country_code == 'USA'
2143
+ for country_code in all_country_rules_this_species.keys():
2144
+ assert country_code in country_code_to_country
2145
+ region_rules = all_country_rules_this_species[country_code]
2146
+ # Right now we only have regional rules for the USA; these may be part of
2147
+ # allow or block rules.
2148
+ if len(region_rules) > 0:
2149
+ assert country_code == 'USA'
1869
2150
 
1870
2151
  # ...for each species
1871
2152
 
@@ -1875,7 +2156,7 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1875
2156
  def _species_string_to_canonical_species_string(species):
1876
2157
  """
1877
2158
  Convert a string that may be a 5-token species string, a binomial name,
1878
- or a common name into a 5-token species string.
2159
+ or a common name into a 5-token species string, using taxonomic lookup.
1879
2160
  """
1880
2161
 
1881
2162
  global taxonomy_string_to_taxonomy_info
@@ -1894,14 +2175,14 @@ def _species_string_to_canonical_species_string(species):
1894
2175
  # If this is already a taxonomy string...
1895
2176
  if len(species.split(';')) == 5:
1896
2177
  pass
1897
- # If this is a binomial name (which may include a subspecies)...
1898
- elif (len(species.split(' ')) in (2,3)) and (species in binomial_name_to_taxonomy_info):
1899
- taxonomy_info = binomial_name_to_taxonomy_info[species]
1900
- taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1901
2178
  # If this is a common name...
1902
2179
  elif species in common_name_to_taxonomy_info:
1903
2180
  taxonomy_info = common_name_to_taxonomy_info[species]
1904
2181
  taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
2182
+ # If this is a binomial name...
2183
+ elif (species in binomial_name_to_taxonomy_info):
2184
+ taxonomy_info = binomial_name_to_taxonomy_info[species]
2185
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1905
2186
  else:
1906
2187
  raise ValueError('Could not find taxonomic information for {}'.format(species))
1907
2188
 
@@ -1966,29 +2247,34 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1966
2247
  allowed_countries = []
1967
2248
  blocked_countries = []
1968
2249
 
1969
- assert len(geofencing_rules_this_species.keys()) == 1
1970
- rule_type = list(geofencing_rules_this_species.keys())[0]
1971
- assert rule_type in ('allow','block')
2250
+ rule_types_this_species = list(geofencing_rules_this_species.keys())
2251
+ for rule_type in rule_types_this_species:
2252
+ assert rule_type in ('allow','block')
1972
2253
 
1973
- if rule_type == 'allow':
1974
- allowed_countries = list(geofencing_rules_this_species['allow'])
1975
- else:
1976
- assert rule_type == 'block'
2254
+ if 'block' in rule_types_this_species:
1977
2255
  blocked_countries = list(geofencing_rules_this_species['block'])
2256
+ if 'allow' in rule_types_this_species:
2257
+ allowed_countries = list(geofencing_rules_this_species['allow'])
1978
2258
 
1979
2259
  status = None
1980
2260
 
1981
2261
  # The convention is that block rules win over allow rules
1982
2262
  if country_code in blocked_countries:
1983
- status = 'blocked'
2263
+ if country_code in allowed_countries:
2264
+ status = 'blocked_over_allow'
2265
+ else:
2266
+ status = 'blocked'
1984
2267
  elif country_code in allowed_countries:
1985
2268
  status = 'allowed'
1986
- else:
2269
+ elif len(allowed_countries) > 0:
1987
2270
  # The convention is that if allow rules exist, any country not on that list
1988
2271
  # is blocked.
1989
- assert len(allowed_countries) > 0
1990
- return 'not_on_country_allow_list'
1991
-
2272
+ status = 'block_not_on_country_allow_list'
2273
+ else:
2274
+ # Only block rules exist for this species, and they don't include this country
2275
+ assert len(blocked_countries) > 0
2276
+ status = 'allow_not_on_block_list'
2277
+
1992
2278
  # Now let's see whether we have to deal with any regional rules
1993
2279
  if state is None:
1994
2280
 
@@ -2441,16 +2727,18 @@ if False:
2441
2727
 
2442
2728
  from megadetector.utils.wi_utils import taxonomy_string_to_geofencing_rules # noqa
2443
2729
  from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
2730
+ from megadetector.utils.wi_utils import common_name_to_taxonomy_info # noqa
2731
+ from megadetector.utils.wi_utils import binomial_name_to_taxonomy_info # noqa
2444
2732
 
2445
- geofencing_file = r'c:\git\cameratrapai\data\geofence_base.json'
2446
-
2447
- country_code_file = r'g:\temp\country-codes.csv'
2733
+ model_base = os.path.expanduser('~/models/speciesnet')
2734
+ geofencing_file = os.path.join(model_base,'crop','geofence_release.2025.02.27.0702.json')
2735
+ country_code_file = os.path.join(model_base,'country-codes.csv')
2448
2736
  # encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
2449
- encoding = None; taxonomy_file = r'g:\temp\taxonomy_mapping.json'
2737
+ encoding = None; taxonomy_file = os.path.join(model_base,'taxonomy_mapping.json')
2450
2738
 
2451
2739
  initialize_geofencing(geofencing_file, country_code_file, force_init=True)
2452
2740
  initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
2453
-
2741
+
2454
2742
 
2455
2743
  #%% Test driver for geofence_fixes.csv function
2456
2744
 
@@ -2458,21 +2746,26 @@ if False:
2458
2746
  species = 'dingo'
2459
2747
  species_string = _species_string_to_canonical_species_string(species)
2460
2748
  rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
2461
-
2462
- import clipboard; clipboard.copy('\n'.join(rows))
2749
+
2750
+ # import clipboard; clipboard.copy('\n'.join(rows))
2751
+ print(rows)
2463
2752
 
2464
2753
 
2465
2754
  #%%
2466
2755
 
2467
- generate_csv_rows_for_species(species_string=species_string,
2468
- allow_countries=None,
2756
+ taxon_name = 'hippopotamus amphibius'
2757
+ taxonomy_info = binomial_name_to_taxonomy_info[taxon_name]
2758
+ taxonomy_string_short = taxonomy_info_to_taxonomy_string(taxonomy_info)
2759
+ assert len(taxonomy_string_short.split(';')) == 5
2760
+
2761
+ generate_csv_rows_for_species(species_string=taxonomy_string_short,
2762
+ allow_countries=['COL'],
2469
2763
  block_countries=None,
2470
2764
  allow_states=None,
2471
2765
  block_states=None,
2472
2766
  blockexcept_countries=None)
2473
-
2474
-
2475
- _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
2767
+
2768
+ # _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
2476
2769
 
2477
2770
 
2478
2771
  #%% Test the effects of geofence changes
@@ -2482,51 +2775,18 @@ if False:
2482
2775
  species_allowed_in_country(species,country,state=None,return_status=False)
2483
2776
 
2484
2777
 
2485
- #%% instances.json generation test
2778
+ #%% Geofencing lookups
2486
2779
 
2487
- from megadetector.utils.wi_utils import generate_instances_json_from_folder # noqa
2488
-
2489
- instances_file = r'g:\temp\water-hole\instances.json'
2490
-
2491
- _ = generate_instances_json_from_folder(folder=r'g:\temp\water-hole',
2492
- country='NAM',
2493
- lat=None,
2494
- lon=None,
2495
- output_file=instances_file,
2496
- filename_replacements={'g:/temp':'/mnt/g/temp'})
2497
-
2498
- # from megadetector.utils.path_utils import open_file; open_file(instances_file)
2499
-
2500
-
2501
- #%% MD --> prediction conversion test
2780
+ # This can be a latin or common name
2781
+ species = 'hippopotamidae'
2782
+ # print(common_name_to_taxonomy_info[species])
2502
2783
 
2503
- from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
2504
- md_results_file = r'G:\temp\md-test-images\mdv5a.relpaths.json'
2505
- predictions_json_file = r'\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\mdv5a.abspaths.predictions-format.json'
2506
- generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
2507
- '/home/dmorris/tmp/md-test-images/')
2508
-
2509
- from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
2510
- md_results_file = r"G:\temp\water-hole\md_results.json"
2511
- predictions_json_file = r"G:\temp\water-hole\md_results-prediction_format.json"
2512
- generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
2513
- '/mnt/g/temp/water-hole')
2514
-
2515
-
2516
- #%% Geofencing tests
2517
-
2518
- species = 'didelphis marsupialis'
2519
- print(binomial_name_to_taxonomy_info[species])
2520
- country = 'Guatemala'
2521
- assert species_allowed_in_country(species, country)
2522
-
2523
- species = 'virginia opossum'
2524
- print(common_name_to_taxonomy_info[species])
2784
+ # This can be a name or country code
2525
2785
  country = 'USA'
2526
- assert species_allowed_in_country(species, country)
2786
+ print(species_allowed_in_country(species, country))
2527
2787
 
2528
2788
 
2529
- #%% Test several species
2789
+ #%% Bulk geofence lookups
2530
2790
 
2531
2791
  if True:
2532
2792
 
@@ -2606,86 +2866,3 @@ if False:
2606
2866
  if state is not None:
2607
2867
  state_string = ' ({})'.format(state)
2608
2868
  print('{} ({}) for {}{}: {}'.format(taxonomy_info['common_name'],species,country,state_string,allowed))
2609
-
2610
-
2611
- #%% Test conversion from predictons.json to MD format
2612
-
2613
- import os # noqa
2614
- from megadetector.utils.wi_utils import generate_md_results_from_predictions_json # noqa
2615
-
2616
- # detector_source = 'speciesnet'
2617
- detector_source = 'md'
2618
-
2619
- if False:
2620
- image_folder = r'g:\temp\md-test-images'
2621
- base_folder = '/home/dmorris/tmp/md-test-images/'
2622
- if detector_source == 'speciesnet':
2623
- predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output.json"
2624
- md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format.json"
2625
- else:
2626
- assert detector_source == 'md'
2627
- predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-from-md-results.json"
2628
- md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format-from-md-results.json"
2629
- else:
2630
- image_folder = r'g:\temp\water-hole'
2631
- base_folder = '/mnt/g/temp/water-hole/'
2632
- if detector_source == 'speciesnet':
2633
- predictions_json_file = r'g:\temp\water-hole\ensemble-output.json'
2634
- md_results_file = r'g:\temp\water-hole\ensemble-output.md_format.json'
2635
- else:
2636
- assert detector_source == 'md'
2637
- predictions_json_file = r'g:\temp\water-hole\ensemble-output-md.json'
2638
- md_results_file = r'g:\temp\water-hole\ensemble-output-md.md_format.json'
2639
-
2640
- generate_md_results_from_predictions_json(predictions_json_file=predictions_json_file,
2641
- md_results_file=md_results_file,
2642
- base_folder=base_folder)
2643
-
2644
- # from megadetector.utils.path_utils import open_file; open_file(md_results_file)
2645
-
2646
- assert os.path.isdir(image_folder)
2647
-
2648
-
2649
- #%% Preview
2650
-
2651
- from megadetector.postprocessing.postprocess_batch_results import \
2652
- PostProcessingOptions, process_batch_results
2653
- from megadetector.utils import path_utils
2654
-
2655
- render_animals_only = False
2656
-
2657
- options = PostProcessingOptions()
2658
- options.image_base_dir = image_folder
2659
- options.include_almost_detections = True
2660
- options.num_images_to_sample = None
2661
- options.confidence_threshold = 0.2
2662
- options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
2663
- options.ground_truth_json_file = None
2664
- options.separate_detections_by_category = True
2665
- options.sample_seed = 0
2666
- options.max_figures_per_html_file = 5000
2667
-
2668
- options.parallelize_rendering = True
2669
- options.parallelize_rendering_n_cores = 10
2670
- options.parallelize_rendering_with_threads = True
2671
- options.sort_classification_results_by_count = True
2672
-
2673
- if render_animals_only:
2674
- # Omit some pages from the output, useful when animals are rare
2675
- options.rendering_bypass_sets = ['detections_person','detections_vehicle',
2676
- 'detections_person_vehicle','non_detections']
2677
-
2678
- output_base = r'g:\temp\preview' + '_' + detector_source
2679
- if render_animals_only:
2680
- output_base = output_base + '_render_animals_only'
2681
- os.makedirs(output_base, exist_ok=True)
2682
-
2683
- print('Writing preview to {}'.format(output_base))
2684
-
2685
- options.md_results_file = md_results_file
2686
- options.output_dir = output_base
2687
- ppresults = process_batch_results(options)
2688
- html_output_file = ppresults.output_html_file
2689
-
2690
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2691
- # import clipboard; clipboard.copy(html_output_file)