megadetector 5.0.24__py3-none-any.whl → 5.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -27,8 +27,12 @@ from tqdm import tqdm
27
27
 
28
28
  from megadetector.utils.path_utils import insert_before_extension
29
29
  from megadetector.utils.ct_utils import split_list_into_n_chunks
30
+ from megadetector.utils.ct_utils import round_floats_in_nested_dict
31
+ from megadetector.utils.ct_utils import is_list_sorted
30
32
  from megadetector.utils.ct_utils import invert_dictionary
31
33
  from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
34
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
35
+ from megadetector.utils.ct_utils import sort_dictionary_by_key
32
36
  from megadetector.utils.path_utils import find_images
33
37
  from megadetector.postprocessing.validate_batch_results import \
34
38
  validate_batch_results, ValidateBatchResultsOptions
@@ -58,10 +62,28 @@ def is_valid_prediction_string(s):
58
62
  Returns:
59
63
  bool: True if this looks more or less like a WI prediction string
60
64
  """
61
-
65
+
66
+ # Note to self... don't get tempted to remove spaces here; spaces are used
67
+ # to indicate subspecies.
62
68
  return isinstance(s,str) and (len(s.split(';')) == 7) and (s == s.lower())
63
69
 
64
70
 
71
+ def is_valid_taxonomy_string(s):
72
+ """
73
+ Determine whether [s] is a valid 5-token WI taxonomy string. Taxonmy strings look like:
74
+
75
+ 'mammalia;rodentia;;;;rodent'
76
+ 'mammalia;chordata;canidae;canis;lupus dingo'
77
+
78
+ Args:
79
+ s (str): the string to be tested for validity
80
+
81
+ Returns:
82
+ bool: True if this looks more or less like a WI taxonomy string
83
+ """
84
+ return isinstance(s,str) and (len(s.split(';')) == 5) and (s == s.lower())
85
+
86
+
65
87
  def wi_result_to_prediction_string(r):
66
88
  """
67
89
  Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
@@ -469,10 +491,14 @@ sample_update_payload = {
469
491
 
470
492
  blank_prediction_string = 'f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank'
471
493
  no_cv_result_prediction_string = 'f2efdae9-efb8-48fb-8a91-eccf79ab4ffb;no cv result;no cv result;no cv result;no cv result;no cv result;no cv result'
472
- rodent_prediction_string = '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
473
- mammal_prediction_string = 'f2d233e3-80e3-433d-9687-e29ecc7a467a;mammalia;;;;;mammal'
474
494
  animal_prediction_string = '1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal'
475
495
  human_prediction_string = '990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human'
496
+ vehicle_prediction_string = 'e2895ed5-780b-48f6-8a11-9e27cb594511;;;;;;vehicle'
497
+
498
+ non_taxonomic_prediction_strings = [blank_prediction_string,
499
+ no_cv_result_prediction_string,
500
+ animal_prediction_string,
501
+ vehicle_prediction_string]
476
502
 
477
503
  process_cv_response_url = 'https://placeholder'
478
504
 
@@ -870,6 +896,7 @@ def get_kingdom(prediction_string):
870
896
  str: the kingdom field from the input string
871
897
  """
872
898
  tokens = prediction_string.split(';')
899
+ assert is_valid_prediction_string(prediction_string)
873
900
  return tokens[1]
874
901
 
875
902
 
@@ -912,7 +939,10 @@ def is_animal_classification(prediction_string):
912
939
  return True
913
940
 
914
941
 
915
- def generate_md_results_from_predictions_json(predictions_json_file,md_results_file,base_folder=None):
942
+ def generate_md_results_from_predictions_json(predictions_json_file,
943
+ md_results_file,
944
+ base_folder=None,
945
+ max_decimals=5):
916
946
  """
917
947
  Generate an MD-formatted .json file from a predictions.json file. Typically,
918
948
  MD results files use relative paths, and predictions.json files use absolute paths, so
@@ -921,21 +951,38 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
921
951
  Currently just applies the top classification category to every detection. If the top classification
922
952
  is "blank", writes an empty detection list.
923
953
 
924
- wi_to_md.py is a command-line driver for this function.
954
+ speciesnet_to_md.py is a command-line driver for this function.
925
955
 
926
956
  Args:
927
- predictions_json_file (str): path to a predictions.json file
957
+ predictions_json_file (str): path to a predictions.json file, or a dict
928
958
  md_results_file (str): path to which we should write an MD-formatted .json file
929
- base_folder (str, optional): leading string to remove from each path in the predictions.json file
959
+ base_folder (str, optional): leading string to remove from each path in the
960
+ predictions.json file
961
+ max_decimals (int, optional): number of decimal places to which we should round
962
+ all values
930
963
  """
931
964
 
932
965
  # Read predictions file
933
- with open(predictions_json_file,'r') as f:
934
- predictions = json.load(f)
966
+ if isinstance(predictions_json_file,str):
967
+ with open(predictions_json_file,'r') as f:
968
+ predictions = json.load(f)
969
+ else:
970
+ assert isinstance(predictions_json_file,dict)
971
+ predictions = predictions_json_file
972
+
973
+ # Round floating-point values (confidence scores, coordinates) to a
974
+ # reasonable number of decimal places
975
+ if max_decimals is not None and max_decimals > 0:
976
+ round_floats_in_nested_dict(predictions)
977
+
935
978
  predictions = predictions['predictions']
936
979
  assert isinstance(predictions,list)
937
980
 
938
- from megadetector.utils.ct_utils import is_list_sorted
981
+ # Convert backslashes to forward slashes in both filenames and the base folder string
982
+ for im in predictions:
983
+ im['filepath'] = im['filepath'].replace('\\','/')
984
+ if base_folder is not None:
985
+ base_folder = base_folder.replace('\\','/')
939
986
 
940
987
  detection_category_id_to_name = {}
941
988
  classification_category_name_to_id = {}
@@ -948,6 +995,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
948
995
  # Create the output images list
949
996
  images_out = []
950
997
 
998
+ base_folder_replacements = 0
999
+
951
1000
  # im_in = predictions[0]
952
1001
  for im_in in predictions:
953
1002
 
@@ -957,6 +1006,7 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
957
1006
  fn = im_in['filepath']
958
1007
  if base_folder is not None:
959
1008
  if fn.startswith(base_folder):
1009
+ base_folder_replacements += 1
960
1010
  fn = fn.replace(base_folder,'',1)
961
1011
 
962
1012
  im_out['file'] = fn
@@ -1056,6 +1106,11 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1056
1106
 
1057
1107
  # ...for each image
1058
1108
 
1109
+ if base_folder is not None:
1110
+ if base_folder_replacements == 0:
1111
+ print('Warning: you supplied {} as the base folder, but I made zero replacements'.format(
1112
+ base_folder))
1113
+
1059
1114
  # Fix the 'unknown' category
1060
1115
 
1061
1116
  if len(all_unknown_detections) > 0:
@@ -1075,7 +1130,8 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1075
1130
 
1076
1131
  # Prepare friendly classification names
1077
1132
 
1078
- classification_category_descriptions = invert_dictionary(classification_category_name_to_id)
1133
+ classification_category_descriptions = \
1134
+ invert_dictionary(classification_category_name_to_id)
1079
1135
  classification_categories_out = {}
1080
1136
  for category_id in classification_category_descriptions.keys():
1081
1137
  category_name = classification_category_descriptions[category_id].split(';')[-1]
@@ -1105,7 +1161,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,md_results_f
1105
1161
  # ...def generate_md_results_from_predictions_json(...)
1106
1162
 
1107
1163
 
1108
- def generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=None):
1164
+ def generate_predictions_json_from_md_results(md_results_file,
1165
+ predictions_json_file,
1166
+ base_folder=None):
1109
1167
  """
1110
1168
  Generate a predictions.json file from the MD-formatted .json file [md_results_file]. Typically,
1111
1169
  MD results files use relative paths, and predictions.json files use absolute paths, so
@@ -1165,13 +1223,16 @@ def generate_predictions_json_from_md_results(md_results_file,predictions_json_f
1165
1223
 
1166
1224
  # ...def generate_predictions_json_from_md_results(...)
1167
1225
 
1226
+ default_tokens_to_ignore = ['$RECYCLE.BIN']
1168
1227
 
1169
1228
  def generate_instances_json_from_folder(folder,
1170
1229
  country=None,
1230
+ admin1_region=None,
1171
1231
  lat=None,
1172
1232
  lon=None,
1173
1233
  output_file=None,
1174
- filename_replacements=None):
1234
+ filename_replacements=None,
1235
+ tokens_to_ignore=default_tokens_to_ignore):
1175
1236
  """
1176
1237
  Generate an instances.json record that contains all images in [folder], optionally
1177
1238
  including location information, in a format suitable for run_model.py. Optionally writes
@@ -1186,6 +1247,8 @@ def generate_instances_json_from_folder(folder,
1186
1247
  filename_replacements (dict, optional): str --> str dict indicating filename substrings
1187
1248
  that should be replaced with other strings. Replacement occurs *after* converting
1188
1249
  backslashes to forward slashes.
1250
+ tokens_to_ignore (list, optional): ignore any images with these tokens in their
1251
+ names, typically used to avoid $RECYCLE.BIN. Can be None.
1189
1252
 
1190
1253
  Returns:
1191
1254
  dict: dict with at least the field "instances"
@@ -1195,6 +1258,13 @@ def generate_instances_json_from_folder(folder,
1195
1258
 
1196
1259
  image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
1197
1260
 
1261
+ if tokens_to_ignore is not None:
1262
+ n_images_before_ignore_tokens = len(image_files_abs)
1263
+ for token in tokens_to_ignore:
1264
+ image_files_abs = [fn for fn in image_files_abs if token not in fn]
1265
+ print('After ignoring {} tokens, kept {} of {} images'.format(
1266
+ len(tokens_to_ignore),len(image_files_abs),n_images_before_ignore_tokens))
1267
+
1198
1268
  instances = []
1199
1269
 
1200
1270
  # image_fn_abs = image_files_abs[0]
@@ -1206,6 +1276,8 @@ def generate_instances_json_from_folder(folder,
1206
1276
  instance['filepath'] = instance['filepath'].replace(s,filename_replacements[s])
1207
1277
  if country is not None:
1208
1278
  instance['country'] = country
1279
+ if admin1_region is not None:
1280
+ instance['admin1_region'] = admin1_region
1209
1281
  if lat is not None:
1210
1282
  assert lon is not None, 'Latitude provided without longitude'
1211
1283
  instance['latitude'] = lat
@@ -1226,14 +1298,243 @@ def generate_instances_json_from_folder(folder,
1226
1298
  # ...def generate_instances_json_from_folder(...)
1227
1299
 
1228
1300
 
1229
- #%% Functions related to geofencing and taxonomy mapping
1301
+ def split_instances_into_n_batches(instances_json,n_batches,output_files=None):
1302
+ """
1303
+ Given an instances.json file, split it into batches of equal size.
1304
+
1305
+ Args:
1306
+ instances_json (str): input .json file in
1307
+ n_batches (int): number of new files to generate
1308
+ output_files (list, optional): output .json files for each
1309
+ batch. If supplied, should have length [n_batches]. If not
1310
+ supplied, filenames will be generated based on [instances_json].
1311
+
1312
+ Returns:
1313
+ list: list of output files that were written; identical to [output_files]
1314
+ if it was supplied as input.
1315
+ """
1316
+
1317
+ with open(instances_json,'r') as f:
1318
+ instances = json.load(f)
1319
+ assert isinstance(instances,dict) and 'instances' in instances
1320
+ instances = instances['instances']
1321
+
1322
+ if output_files is not None:
1323
+ assert len(output_files) == n_batches, \
1324
+ 'Expected {} output files, received {}'.format(
1325
+ n_batches,len(output_files))
1326
+ else:
1327
+ output_files = []
1328
+ for i_batch in range(0,n_batches):
1329
+ batch_string = 'batch_{}'.format(str(i_batch).zfill(3))
1330
+ output_files.append(insert_before_extension(instances_json,batch_string))
1331
+
1332
+ batches = split_list_into_n_chunks(instances, n_batches)
1333
+
1334
+ for i_batch,batch in enumerate(batches):
1335
+ batch_dict = {'instances':batch}
1336
+ with open(output_files[i_batch],'w') as f:
1337
+ json.dump(batch_dict,f,indent=1)
1338
+
1339
+ print('Wrote {} batches to file'.format(n_batches))
1340
+
1341
+ return output_files
1342
+
1343
+
1344
+ def merge_prediction_json_files(input_prediction_files,output_prediction_file):
1345
+ """
1346
+ Merge all predictions.json files in [files] into a single .json file.
1347
+
1348
+ Args:
1349
+ files (list): list of predictions.json files to merge
1350
+ output_file (str): output .json file
1351
+ """
1352
+
1353
+ predictions = []
1354
+ image_filenames_processed = set()
1355
+
1356
+ # input_json_fn = input_prediction_files[0]
1357
+ for input_json_fn in tqdm(input_prediction_files):
1358
+
1359
+ assert os.path.isfile(input_json_fn), \
1360
+ 'Could not find prediction file {}'.format(input_json_fn)
1361
+ with open(input_json_fn,'r') as f:
1362
+ results_this_file = json.load(f)
1363
+ assert isinstance(results_this_file,dict)
1364
+ predictions_this_file = results_this_file['predictions']
1365
+ for prediction in predictions_this_file:
1366
+ image_fn = prediction['filepath']
1367
+ assert image_fn not in image_filenames_processed
1368
+ predictions.extend(predictions_this_file)
1369
+
1370
+ output_dict = {'predictions':predictions}
1371
+
1372
+ os.makedirs(os.path.dirname(output_prediction_file),exist_ok=True)
1373
+ with open(output_prediction_file,'w') as f:
1374
+ json.dump(output_dict,f,indent=1)
1375
+
1376
+ # ...def merge_prediction_json_files(...)
1377
+
1378
+
1379
+ def validate_predictions_file(fn,instances=None,verbose=True):
1380
+ """
1381
+ Validate the predictions.json file [fn].
1382
+
1383
+ Args:
1384
+ fn (str): a .json file in predictions.json (SpeciesNet) format
1385
+ instances (str or list, optional): a folder, instances.json file,
1386
+ or dict loaded from an instances.json file. If supplied, this
1387
+ function will verify that [fn] contains the same number of
1388
+ images as [instances].
1389
+ verbose (bool, optional): enable additional debug output
1390
+
1391
+ Returns:
1392
+ dict: the contents of [fn]
1393
+ """
1394
+
1395
+ with open(fn,'r') as f:
1396
+ d = json.load(f)
1397
+ predictions = d['predictions']
1398
+
1399
+ failures = []
1400
+
1401
+ for im in predictions:
1402
+ if 'failures' in im:
1403
+ failures.append(im)
1404
+
1405
+ if verbose:
1406
+ print('Read detector results for {} images, with {} failure(s)'.format(
1407
+ len(d['predictions']),len(failures)))
1408
+
1409
+ if instances is not None:
1410
+
1411
+ if isinstance(instances,str):
1412
+ if os.path.isdir(instances):
1413
+ instances = generate_instances_json_from_folder(folder=instances)
1414
+ elif os.path.isfile(instances):
1415
+ with open(instances,'r') as f:
1416
+ instances = json.load(f)
1417
+ else:
1418
+ raise ValueError('Could not find instances file/folder {}'.format(
1419
+ instances))
1420
+ assert isinstance(instances,dict)
1421
+ assert 'instances' in instances
1422
+ instances = instances['instances']
1423
+ if verbose:
1424
+ print('Expected results for {} files'.format(len(instances)))
1425
+ assert len(instances) == len(predictions), \
1426
+ '{} instances expected, {} found'.format(
1427
+ len(instances),len(predictions))
1428
+
1429
+ expected_files = set([instance['filepath'] for instance in instances])
1430
+ found_files = set([prediction['filepath'] for prediction in predictions])
1431
+ assert expected_files == found_files
1432
+
1433
+ # ...if a list of instances was supplied
1434
+
1435
+ return d
1436
+
1437
+ # ...def validate_predictions_file(...)
1438
+
1439
+
1440
+ def find_geofence_adjustments(ensemble_json_file,use_latin_names=False):
1441
+ """
1442
+ Count the number of instances of each unique change made by the geofence.
1443
+
1444
+ Args:
1445
+ ensemble_json_file (str): SpeciesNet-formatted .json file produced
1446
+ by the full ensemble.
1447
+ use_latin_names (bool, optional): return a mapping using binomial names
1448
+ rather than common names.
1449
+
1450
+ Returns:
1451
+ dict: maps strings that look like "puma,felidae family" to integers,
1452
+ where that entry would indicate the number of times that "puma" was
1453
+ predicted, but mapped to family level by the geofence. Sorted in
1454
+ descending order by count.
1455
+ """
1456
+
1457
+ ensemble_results = validate_predictions_file(ensemble_json_file)
1458
+
1459
+ assert isinstance(ensemble_results,dict)
1460
+ predictions = ensemble_results['predictions']
1461
+
1462
+ # Maps comma-separated pairs of common names (or binomial names) to
1463
+ # the number of times that transition (first --> second) happened
1464
+ rollup_pair_to_count = defaultdict(int)
1465
+
1466
+ # prediction = predictions[0]
1467
+ for prediction in tqdm(predictions):
1468
+
1469
+ if 'failures' in prediction and \
1470
+ prediction['failures'] is not None and \
1471
+ len(prediction['failures']) > 0:
1472
+ continue
1473
+
1474
+ assert 'prediction_source' in prediction, \
1475
+ 'Prediction present without [prediction_source] field, are you sure this ' + \
1476
+ 'is an ensemble output file?'
1477
+
1478
+ if 'geofence' in prediction['prediction_source']:
1479
+
1480
+ classification_taxonomy_string = \
1481
+ prediction['classifications']['classes'][0]
1482
+ prediction_taxonomy_string = prediction['prediction']
1483
+ assert is_valid_prediction_string(classification_taxonomy_string)
1484
+ assert is_valid_prediction_string(prediction_taxonomy_string)
1485
+
1486
+ # Typical examples:
1487
+ # '86f5b978-4f30-40cc-bd08-be9e3fba27a0;mammalia;rodentia;sciuridae;sciurus;carolinensis;eastern gray squirrel'
1488
+ # 'e4d1e892-0e4b-475a-a8ac-b5c3502e0d55;mammalia;rodentia;sciuridae;;;sciuridae family'
1489
+ classification_common_name = classification_taxonomy_string.split(';')[-1]
1490
+ prediction_common_name = prediction_taxonomy_string.split(';')[-1]
1491
+ classification_binomial_name = classification_taxonomy_string.split(';')[-2]
1492
+ prediction_binomial_name = prediction_taxonomy_string.split(';')[-2]
1493
+
1494
+ input_name = classification_binomial_name if use_latin_names else \
1495
+ classification_common_name
1496
+ output_name = prediction_binomial_name if use_latin_names else \
1497
+ prediction_common_name
1498
+
1499
+ rollup_pair = input_name.strip() + ',' + output_name.strip()
1500
+ rollup_pair_to_count[rollup_pair] += 1
1501
+
1502
+ # ...if we made a geofencing change
1503
+
1504
+ # ...for each prediction
1505
+
1506
+ rollup_pair_to_count = sort_dictionary_by_value(rollup_pair_to_count,reverse=True)
1507
+
1508
+ return rollup_pair_to_count
1509
+
1510
+ # ...def find_geofence_adjustments(...)
1511
+
1512
+
1513
+ #%% Module-level globals related to taxonomy mapping and geofencing
1230
1514
 
1231
1515
  # This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
1232
1516
  # a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
1233
1517
  taxonomy_string_to_taxonomy_info = None
1518
+
1519
+ # Maps a binomial name (possibly three tokens, if it's a subspecies) to the same dict
1520
+ # described above.
1234
1521
  binomial_name_to_taxonomy_info = None
1522
+
1523
+ # Maps a common name to the same dict described above
1235
1524
  common_name_to_taxonomy_info = None
1236
1525
 
1526
+ # Dict mapping 5-token semicolon-delimited taxonomy strings to geofencing rules
1527
+ taxonomy_string_to_geofencing_rules = None
1528
+
1529
+ # Maps lower-case country names to upper-case country codes
1530
+ country_to_country_code = None
1531
+
1532
+ # Maps upper-case country codes to lower-case country names
1533
+ country_code_to_country = None
1534
+
1535
+
1536
+ #%% Functions related to geofencing and taxonomy mapping
1537
+
1237
1538
  def taxonomy_info_to_taxonomy_string(taxonomy_info):
1238
1539
  """
1239
1540
  Convert a taxonomy record in dict format to a semicolon-delimited string
@@ -1258,12 +1559,16 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1258
1559
  [common_name_to_taxonomy_info].
1259
1560
 
1260
1561
  Args:
1261
- taxonomy_file (str): .json file containing WI taxonomy information
1562
+ taxonomy_file (str): .json file containing mappings from the short taxonomy strings
1563
+ to the longer strings with GUID and common name, see example below.
1262
1564
  force_init (bool, optional): if the output dicts already exist, should we
1263
1565
  re-initialize anyway?
1264
1566
  encoding (str, optional): character encoding to use when opening the .json file
1265
1567
  """
1266
1568
 
1569
+ if encoding is None:
1570
+ encoding = 'cp1252'
1571
+
1267
1572
  global taxonomy_string_to_taxonomy_info
1268
1573
  global binomial_name_to_taxonomy_info
1269
1574
  global common_name_to_taxonomy_info
@@ -1326,22 +1631,159 @@ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1326
1631
  # print('Warning: no binomial name for {}'.format(taxonomy_string))
1327
1632
  pass
1328
1633
  else:
1634
+ # strip(), but don't remove spaces from the species name;
1635
+ # subspecies are separated with a space, e.g. canis;lupus dingo
1329
1636
  binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1330
1637
  binomial_name_to_taxonomy_info[binomial_name] = taxon_info
1638
+
1639
+ print('Created {} records in taxonomy_string_to_taxonomy_info'.format(
1640
+ len(taxonomy_string_to_taxonomy_info)))
1331
1641
 
1332
1642
  # ...def initialize_taxonomy_info(...)
1333
1643
 
1334
1644
 
1335
- #%% Geofencing functions
1336
-
1337
- # Dict mapping semicolon-delimited taxonomy strings to geofencing rules
1338
- taxonomy_string_to_geofencing_rules = None
1645
+ def _parse_code_list(codes):
1646
+ """
1647
+ Turn a list of country or state codes in string, delimited string, or list format
1648
+ into a list. Also does basic validity checking.
1649
+ """
1650
+
1651
+ if not isinstance(codes,list):
1652
+
1653
+ assert isinstance(codes,str)
1654
+
1655
+ codes = codes.strip()
1656
+
1657
+ # This is just a single codes
1658
+ if ',' not in codes:
1659
+ codes = [codes]
1660
+ else:
1661
+ codes = codes.split(',')
1662
+ codes = [c.strip() for c in codes]
1663
+
1664
+ assert isinstance(codes,list)
1665
+
1666
+ codes = [c.upper().strip() for c in codes]
1667
+
1668
+ for c in codes:
1669
+ assert len(c) in (2,3)
1670
+
1671
+ return codes
1672
+
1673
+
1674
+ def _generate_csv_rows_to_block_all_countries_except(
1675
+ species_string,
1676
+ block_except_list):
1677
+ """
1678
+ Generate rows in the format expected by geofence_fixes.csv, representing a list of
1679
+ allow and block rules to block all countries currently allowed for this species
1680
+ except [allow_countries], and add allow rules these countries.
1681
+ """
1682
+
1683
+ assert is_valid_taxonomy_string(species_string), \
1684
+ '{} is not a valid taxonomy string'.format(species_string)
1685
+
1686
+ global taxonomy_string_to_taxonomy_info
1687
+ global binomial_name_to_taxonomy_info
1688
+ global common_name_to_taxonomy_info
1689
+
1690
+ assert taxonomy_string_to_geofencing_rules is not None, \
1691
+ 'Initialize geofencing prior to species lookup'
1692
+ assert taxonomy_string_to_taxonomy_info is not None, \
1693
+ 'Initialize taxonomy lookup prior to species lookup'
1694
+
1695
+ geofencing_rules_this_species = \
1696
+ taxonomy_string_to_geofencing_rules[species_string]
1697
+
1698
+ allowed_countries = []
1699
+ if 'allow' in geofencing_rules_this_species:
1700
+ allowed_countries.extend(geofencing_rules_this_species['allow'])
1701
+
1702
+ blocked_countries = []
1703
+ if 'block' in geofencing_rules_this_species:
1704
+ blocked_countries.extend(geofencing_rules_this_species['block'])
1705
+
1706
+ block_except_list = _parse_code_list(block_except_list)
1707
+
1708
+ countries_to_block = []
1709
+ countries_to_allow = []
1710
+
1711
+ # country = allowed_countries[0]
1712
+ for country in allowed_countries:
1713
+ if country not in block_except_list and country not in blocked_countries:
1714
+ countries_to_block.append(country)
1715
+
1716
+ for country in block_except_list:
1717
+ if country in blocked_countries:
1718
+ raise ValueError("I can't allow a country that has already been blocked")
1719
+ if country not in allowed_countries:
1720
+ countries_to_allow.append(country)
1721
+
1722
+ rows = generate_csv_rows_for_species(species_string,
1723
+ allow_countries=countries_to_allow,
1724
+ block_countries=countries_to_block)
1725
+
1726
+ return rows
1727
+
1728
+ # ...def _generate_csv_rows_to_block_all_countries_except(...)
1729
+
1730
+
1731
+ def generate_csv_rows_for_species(species_string,
1732
+ allow_countries=None,
1733
+ block_countries=None,
1734
+ allow_states=None,
1735
+ block_states=None,
1736
+ blockexcept_countries=None):
1737
+ """
1738
+ Generate rows in the format expected by geofence_fixes.csv, representing a list of
1739
+ allow and/or block rules for the specified species and countries/states. Does not check
1740
+ that the rules make sense; e.g. nothing will stop you in this function from both allowing
1741
+ and blocking a country.
1742
+
1743
+ Args:
1744
+ species_string (str): string in semicolon-delimited WI taxonomy format
1745
+ allow_countries (optional, list or str): three-letter country codes, list of
1746
+ country codes, or comma-separated list of country codes to allow
1747
+ block_countries (optional, list or str): three-letter country codes, list of
1748
+ country codes, or comma-separated list of country codes to block
1749
+ allow_states (optional, list or str): two-letter state codes, list of
1750
+ state codes, or comma-separated list of state codes to allow
1751
+ block_states (optional, list or str): two-letter state code, list of
1752
+ state codes, or comma-separated list of state codes to block
1753
+
1754
+ Returns:
1755
+ list of str: lines ready to be pasted into geofence_fixes.csv
1756
+ """
1757
+
1758
+ assert is_valid_taxonomy_string(species_string), \
1759
+ '{} is not a valid taxonomy string'.format(species_string)
1760
+
1761
+ lines = []
1762
+
1763
+ if allow_countries is not None:
1764
+ allow_countries = _parse_code_list(allow_countries)
1765
+ for country in allow_countries:
1766
+ lines.append(species_string + ',allow,' + country + ',')
1767
+
1768
+ if block_countries is not None:
1769
+ block_countries = _parse_code_list(block_countries)
1770
+ for country in block_countries:
1771
+ lines.append(species_string + ',block,' + country + ',')
1772
+
1773
+ if allow_states is not None:
1774
+ allow_states = _parse_code_list(allow_states)
1775
+ for state in allow_states:
1776
+ lines.append(species_string + ',allow,USA,' + state)
1777
+
1778
+ if block_states is not None:
1779
+ block_states = _parse_code_list(block_states)
1780
+ for state in block_states:
1781
+ lines.append(species_string + ',block,USA,' + state)
1782
+
1783
+ return lines
1339
1784
 
1340
- # Maps lower-case country names to upper-case country codes
1341
- country_to_country_code = None
1785
+ # ...def generate_csv_rows_for_species(...)
1342
1786
 
1343
- # Maps upper-case country codes to lower-case country names
1344
- country_code_to_country = None
1345
1787
 
1346
1788
  def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1347
1789
  """
@@ -1351,10 +1793,13 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1351
1793
 
1352
1794
  Args:
1353
1795
  geofencing_file (str): .json file with geofencing rules
1354
- country_code_file (str): .csv file with country code mappings
1796
+ country_code_file (str): .csv file with country code mappings, in columns
1797
+ called "name" and "alpha-3", e.g. from
1798
+ https://github.com/lukes/ISO-3166-Countries-with-Regional-Codes/blob/master/all/all.csv
1355
1799
  force_init (bool, optional): if the output dicts already exist, should we
1356
1800
  re-initialize anyway?
1357
1801
  """
1802
+
1358
1803
  global taxonomy_string_to_geofencing_rules
1359
1804
  global country_to_country_code
1360
1805
  global country_code_to_country
@@ -1427,35 +1872,21 @@ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1427
1872
  # ...def initialize_geofencing(...)
1428
1873
 
1429
1874
 
1430
- def species_allowed_in_country(species,country,state=None,return_status=False):
1875
+ def _species_string_to_canonical_species_string(species):
1876
+ """
1877
+ Convert a string that may be a 5-token species string, a binomial name,
1878
+ or a common name into a 5-token species string.
1431
1879
  """
1432
- Determines whether [species] is allowed in [country], according to
1433
- already-initialized geofencing rules.
1434
1880
 
1435
- Args:
1436
- species (str): can be a common name, a binomial name, or a species string
1437
- country (str): country name or three-letter code
1438
- state (str, optional): two-letter US state code
1439
- return_status (bool, optional): by default, this function returns a bool;
1440
- if you want to know *why* [species] is allowed/not allowed, settings
1441
- return_status to True will return additional information.
1881
+ global taxonomy_string_to_taxonomy_info
1882
+ global binomial_name_to_taxonomy_info
1883
+ global common_name_to_taxonomy_info
1442
1884
 
1443
- Returns:
1444
- bool or str: typically returns True if [species] is allowed in [country], else
1445
- False. Returns a more detailed string if return_status is set.
1446
- """
1447
-
1448
1885
  assert taxonomy_string_to_geofencing_rules is not None, \
1449
1886
  'Initialize geofencing prior to species lookup'
1450
1887
  assert taxonomy_string_to_taxonomy_info is not None, \
1451
1888
  'Initialize taxonomy lookup prior to species lookup'
1452
-
1453
- # species = 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1454
- # species = 'didelphis marsupialis'
1455
- # country = 'Guatemala'
1456
-
1457
- # species = 'common opossum'
1458
-
1889
+
1459
1890
  species = species.lower()
1460
1891
 
1461
1892
  # Turn "species" into a taxonomy string
@@ -1463,8 +1894,8 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1463
1894
  # If this is already a taxonomy string...
1464
1895
  if len(species.split(';')) == 5:
1465
1896
  pass
1466
- # If this is a binomial name...
1467
- elif len(species.split(' ')) == 2 and (species in binomial_name_to_taxonomy_info):
1897
+ # If this is a binomial name (which may include a subspecies)...
1898
+ elif (len(species.split(' ')) in (2,3)) and (species in binomial_name_to_taxonomy_info):
1468
1899
  taxonomy_info = binomial_name_to_taxonomy_info[species]
1469
1900
  taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1470
1901
  # If this is a common name...
@@ -1474,6 +1905,37 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1474
1905
  else:
1475
1906
  raise ValueError('Could not find taxonomic information for {}'.format(species))
1476
1907
 
1908
+ return taxonomy_string
1909
+
1910
+
1911
+ def species_allowed_in_country(species,country,state=None,return_status=False):
1912
+ """
1913
+ Determines whether [species] is allowed in [country], according to
1914
+ already-initialized geofencing rules.
1915
+
1916
+ Args:
1917
+ species (str): can be a common name, a binomial name, or a species string
1918
+ country (str): country name or three-letter code
1919
+ state (str, optional): two-letter US state code
1920
+ return_status (bool, optional): by default, this function returns a bool;
1921
+ if you want to know *why* [species] is allowed/not allowed, settings
1922
+ return_status to True will return additional information.
1923
+
1924
+ Returns:
1925
+ bool or str: typically returns True if [species] is allowed in [country], else
1926
+ False. Returns a more detailed string if return_status is set.
1927
+ """
1928
+
1929
+ global taxonomy_string_to_taxonomy_info
1930
+ global binomial_name_to_taxonomy_info
1931
+ global common_name_to_taxonomy_info
1932
+
1933
+ assert taxonomy_string_to_geofencing_rules is not None, \
1934
+ 'Initialize geofencing prior to species lookup'
1935
+ assert taxonomy_string_to_taxonomy_info is not None, \
1936
+ 'Initialize taxonomy lookup prior to species lookup'
1937
+
1938
+ taxonomy_string = _species_string_to_canonical_species_string(species)
1477
1939
 
1478
1940
  # Normalize [state]
1479
1941
 
@@ -1515,6 +1977,8 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1515
1977
  blocked_countries = list(geofencing_rules_this_species['block'])
1516
1978
 
1517
1979
  status = None
1980
+
1981
+ # The convention is that block rules win over allow rules
1518
1982
  if country_code in blocked_countries:
1519
1983
  status = 'blocked'
1520
1984
  elif country_code in allowed_countries:
@@ -1565,12 +2029,459 @@ def species_allowed_in_country(species,country,state=None,return_status=False):
1565
2029
  # ...def species_allowed_in_country(...)
1566
2030
 
1567
2031
 
2032
+ def restrict_to_taxa_list(taxa_list,
2033
+ speciesnet_taxonomy_file,
2034
+ input_file,
2035
+ output_file,
2036
+ allow_walk_down=False):
2037
+ """
2038
+ Given a prediction file in MD .json format, likely without having had
2039
+ a geofence applied, apply a custom taxa list.
2040
+
2041
+ Args:
2042
+ taxa_list (str or list): list of latin names, or a text file containing
2043
+ a list of latin names. Optionally may contain a second (comma-delimited)
2044
+ column containing common names, used only for debugging. Latin names
2045
+ must exist in the SpeciesNet taxonomy.
2046
+ taxonomy_file (str): taxonomy filename, in the same format used for model
2047
+ release (with 7-token taxonomy entries)
2048
+ output_file (str): .json file to write, in MD format
2049
+ allow_walk_down (bool, optional): should we walk down the taxonomy tree
2050
+ when making mappings if a parent has only a single allowable child?
2051
+ For example, if only a single felid species is allowed, should other
2052
+ felid predictions be mapped to that species, as opposed to being mapped
2053
+ to the family?
2054
+ """
2055
+
2056
+ ##%% Read target taxa list
2057
+
2058
+ if isinstance(taxa_list,str):
2059
+ assert os.path.isfile(taxa_list), \
2060
+ 'Could not find taxa list file {}'.format(taxa_list)
2061
+ with open(taxa_list,'r') as f:
2062
+ taxa_list = f.readlines()
2063
+
2064
+ taxa_list = [s.strip().lower() for s in taxa_list]
2065
+ taxa_list = [s for s in taxa_list if len(s) > 0]
2066
+
2067
+ target_latin_to_common = {}
2068
+ for s in taxa_list:
2069
+ if s.strip().startswith('#'):
2070
+ continue
2071
+ tokens = s.split(',')
2072
+ assert len(tokens) <= 2
2073
+ binomial_name = tokens[0]
2074
+ assert len(binomial_name.split(' ')) in (1,2,3), \
2075
+ 'Illegal binomial name in species list: {}'.format(binomial_name)
2076
+ if len(tokens) > 0:
2077
+ common_name = tokens[1].strip().lower()
2078
+ else:
2079
+ common_name = None
2080
+ assert binomial_name not in target_latin_to_common
2081
+ target_latin_to_common[binomial_name] = common_name
2082
+
2083
+
2084
+ ##%% Read taxonomy file
2085
+
2086
+ with open(speciesnet_taxonomy_file,'r') as f:
2087
+ speciesnet_taxonomy_list = f.readlines()
2088
+ speciesnet_taxonomy_list = [s.strip() for s in \
2089
+ speciesnet_taxonomy_list if len(s.strip()) > 0]
2090
+
2091
+ # Maps the latin name of every taxon to the corresponding full taxon string
2092
+ #
2093
+ # For species, the key is a binomial name
2094
+ speciesnet_latin_name_to_taxon_string = {}
2095
+ speciesnet_common_name_to_taxon_string = {}
2096
+
2097
+ def _insert_taxonomy_string(s):
2098
+
2099
+ tokens = s.split(';')
2100
+ assert len(tokens) == 7
2101
+
2102
+ guid = tokens[0] # noqa
2103
+ class_name = tokens[1]
2104
+ order = tokens[2]
2105
+ family = tokens[3]
2106
+ genus = tokens[4]
2107
+ species = tokens[5]
2108
+ common_name = tokens[6]
2109
+
2110
+ if len(class_name) == 0:
2111
+ assert common_name in ('animal','vehicle','blank')
2112
+ return
2113
+
2114
+ if len(species) > 0:
2115
+ assert all([len(s) > 0 for s in [genus,family,order]])
2116
+ binomial_name = genus + ' ' + species
2117
+ if binomial_name not in speciesnet_latin_name_to_taxon_string:
2118
+ speciesnet_latin_name_to_taxon_string[binomial_name] = s
2119
+ elif len(genus) > 0:
2120
+ assert all([len(s) > 0 for s in [family,order]])
2121
+ if genus not in speciesnet_latin_name_to_taxon_string:
2122
+ speciesnet_latin_name_to_taxon_string[genus] = s
2123
+ elif len(family) > 0:
2124
+ assert len(order) > 0
2125
+ if family not in speciesnet_latin_name_to_taxon_string:
2126
+ speciesnet_latin_name_to_taxon_string[family] = s
2127
+ elif len(order) > 0:
2128
+ if order not in speciesnet_latin_name_to_taxon_string:
2129
+ speciesnet_latin_name_to_taxon_string[order] = s
2130
+ else:
2131
+ if class_name not in speciesnet_latin_name_to_taxon_string:
2132
+ speciesnet_latin_name_to_taxon_string[class_name] = s
2133
+
2134
+ if len(common_name) > 0:
2135
+ if common_name not in speciesnet_common_name_to_taxon_string:
2136
+ speciesnet_common_name_to_taxon_string[common_name] = s
2137
+
2138
+ for s in speciesnet_taxonomy_list:
2139
+
2140
+ _insert_taxonomy_string(s)
2141
+
2142
+
2143
+ ##%% Make sure all parent taxa are represented in the taxonomy
2144
+
2145
+ # In theory any taxon that appears as the parent of another taxon should
2146
+ # also be in the taxonomy, but this isn't always true, so we fix it here.
2147
+
2148
+ new_taxon_string_to_missing_tokens = defaultdict(list)
2149
+
2150
+ # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
2151
+ for latin_name in speciesnet_latin_name_to_taxon_string.keys():
2152
+
2153
+ if 'no cv result' in latin_name:
2154
+ continue
2155
+
2156
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
2157
+ tokens = taxon_string.split(';')
2158
+
2159
+ # Don't process GUID, species, or common name
2160
+ # i_token = 6
2161
+ for i_token in range(1,len(tokens)-2):
2162
+
2163
+ test_token = tokens[i_token]
2164
+ if len(test_token) == 0:
2165
+ continue
2166
+
2167
+ # Do we need to make up a taxon for this token?
2168
+ if test_token not in speciesnet_latin_name_to_taxon_string:
2169
+
2170
+ new_tokens = [''] * 7
2171
+ new_tokens[0] = 'fake_guid'
2172
+ for i_copy_token in range(1,i_token+1):
2173
+ new_tokens[i_copy_token] = tokens[i_copy_token]
2174
+ new_tokens[-1] = test_token + ' species'
2175
+ assert new_tokens[-2] == ''
2176
+ new_taxon_string = ';'.join(new_tokens)
2177
+ # assert new_taxon_string not in new_taxon_strings
2178
+ new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
2179
+
2180
+ # ...for each token
2181
+
2182
+ # ...for each taxon
2183
+
2184
+ print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
2185
+ len(new_taxon_string_to_missing_tokens)))
2186
+
2187
+ new_taxon_string_to_missing_tokens = \
2188
+ sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
2189
+ for taxon_string in new_taxon_string_to_missing_tokens:
2190
+ missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
2191
+ print('{} ({})'.format(taxon_string,missing_taxa))
2192
+
2193
+ for new_taxon_string in new_taxon_string_to_missing_tokens:
2194
+ _insert_taxonomy_string(new_taxon_string)
2195
+
2196
+
2197
+ ##%% Make sure all species on the allow-list are in the taxonomy
2198
+
2199
+ n_failed_mappings = 0
2200
+
2201
+ for target_taxon_latin_name in target_latin_to_common.keys():
2202
+ if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
2203
+ common_name = target_latin_to_common[target_taxon_latin_name]
2204
+ s = '{} ({}) not in speciesnet taxonomy'.format(
2205
+ target_taxon_latin_name,common_name)
2206
+ if common_name in speciesnet_common_name_to_taxon_string:
2207
+ s += ' (common name maps to {})'.format(
2208
+ speciesnet_common_name_to_taxon_string[common_name])
2209
+ print(s)
2210
+ n_failed_mappings += 1
2211
+
2212
+ if n_failed_mappings > 0:
2213
+ raise ValueError('Cannot continue with geofence generation')
2214
+
2215
+
2216
+ ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
2217
+
2218
+ # Maps parent names to all allowed child names, or None if this is the
2219
+ # lowest-level allowable taxon on this path
2220
+ allowed_parent_taxon_to_child_taxa = defaultdict(set)
2221
+
2222
+ # latin_name = next(iter(target_latin_to_common.keys()))
2223
+ for latin_name in target_latin_to_common:
2224
+
2225
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
2226
+ tokens = taxon_string.split(';')
2227
+ assert len(tokens) == 7
2228
+
2229
+ # Remove GUID and common mame
2230
+ #
2231
+ # This is now always class/order/family/genus/species
2232
+ tokens = tokens[1:-1]
2233
+
2234
+ child_taxon = None
2235
+
2236
+ # If this is a species
2237
+ if len(tokens[-1]) > 0:
2238
+ binomial_name = tokens[-2] + ' ' + tokens[-1]
2239
+ assert binomial_name == latin_name
2240
+ allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
2241
+ child_taxon = binomial_name
2242
+
2243
+ # The first candidate parent is the genus
2244
+ parent_token_index = len(tokens) - 2
2245
+
2246
+ while(parent_token_index >= 0):
2247
+
2248
+ parent_taxon = tokens[parent_token_index]
2249
+ allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
2250
+ child_taxon = parent_taxon
2251
+ parent_token_index -= 1
2252
+
2253
+ # ...for each allowed latin name
2254
+
2255
+ allowed_parent_taxon_to_child_taxa = \
2256
+ sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
2257
+
2258
+
2259
+ ##%% Map all predictions that exist in this dataset...
2260
+
2261
+ # ...to the prediction we should generate.
2262
+
2263
+ with open(input_file,'r') as f:
2264
+ input_data = json.load(f)
2265
+
2266
+ input_category_id_to_common_name = input_data['classification_categories'] #noqa
2267
+ input_category_id_to_taxonomy_string = \
2268
+ input_data['classification_category_descriptions']
2269
+
2270
+ input_category_id_to_output_taxon_string = {}
2271
+
2272
+ # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
2273
+ for input_category_id in input_category_id_to_taxonomy_string.keys():
2274
+
2275
+ input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
2276
+ input_taxon_tokens = input_taxon_string.split(';')
2277
+ assert len(input_taxon_tokens) == 7
2278
+
2279
+ # Don't mess with blank/no-cv-result/animal/human
2280
+ if (input_taxon_string in non_taxonomic_prediction_strings) or \
2281
+ (input_taxon_string == human_prediction_string):
2282
+ input_category_id_to_output_taxon_string[input_category_id] = \
2283
+ input_taxon_string
2284
+ continue
2285
+
2286
+ # Remove GUID and common mame
2287
+ #
2288
+ # This is now always class/order/family/genus/species
2289
+ input_taxon_tokens = input_taxon_tokens[1:-1]
2290
+
2291
+ test_index = len(input_taxon_tokens) - 1
2292
+ target_taxon = None
2293
+
2294
+ # Start at the species level, and see whether each taxon is allowed
2295
+ while((test_index >= 0) and (target_taxon is None)):
2296
+
2297
+ # Species are represented as binomial names
2298
+ if (test_index == (len(input_taxon_tokens) - 1)) and \
2299
+ (len(input_taxon_tokens[-1]) > 0):
2300
+ test_taxon_name = \
2301
+ input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
2302
+ else:
2303
+ test_taxon_name = input_taxon_tokens[test_index]
2304
+
2305
+ # If we haven't yet found the level at which this taxon is non-empty,
2306
+ # keep going up
2307
+ if len(test_taxon_name) == 0:
2308
+ test_index -= 1
2309
+ continue
2310
+
2311
+ assert test_taxon_name in speciesnet_latin_name_to_taxon_string
2312
+
2313
+ # Is this taxon allowed according to the custom species list?
2314
+ if test_taxon_name in allowed_parent_taxon_to_child_taxa:
2315
+
2316
+ allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
2317
+ assert allowed_child_taxa is not None
2318
+
2319
+ # If this is the lowest-level allowable token or there is not a
2320
+ # unique child, don't walk any further, even if walking down
2321
+ # is enabled.
2322
+ if (None in allowed_child_taxa):
2323
+ assert len(allowed_child_taxa) == 1
2324
+
2325
+ if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
2326
+ target_taxon = test_taxon_name
2327
+ elif not allow_walk_down:
2328
+ target_taxon = test_taxon_name
2329
+ else:
2330
+ # If there's a unique child, walk back *down* the allowable
2331
+ # taxa until we run out of unique children
2332
+ while ((next(iter(allowed_child_taxa)) is not None) and \
2333
+ (len(allowed_child_taxa) == 1)):
2334
+ candidate_taxon = next(iter(allowed_child_taxa))
2335
+ assert candidate_taxon in allowed_parent_taxon_to_child_taxa
2336
+ assert candidate_taxon in speciesnet_latin_name_to_taxon_string
2337
+ allowed_child_taxa = \
2338
+ allowed_parent_taxon_to_child_taxa[candidate_taxon]
2339
+ target_taxon = candidate_taxon
2340
+
2341
+ # ...if this is an allowed taxon
2342
+
2343
+ test_index -= 1
2344
+
2345
+ # ...for each token
2346
+
2347
+ if target_taxon is None:
2348
+ output_taxon_string = animal_prediction_string
2349
+ else:
2350
+ output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
2351
+ input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
2352
+
2353
+ # ...for each category
2354
+
2355
+
2356
+ ##%% Build the new tables
2357
+
2358
+ input_category_id_to_output_category_id = {}
2359
+ output_taxon_string_to_category_id = {}
2360
+ output_category_id_to_common_name = {}
2361
+
2362
+ for input_category_id in input_category_id_to_output_taxon_string:
2363
+
2364
+ original_common_name = \
2365
+ input_category_id_to_common_name[input_category_id]
2366
+ original_taxon_string = \
2367
+ input_category_id_to_taxonomy_string[input_category_id]
2368
+ output_taxon_string = \
2369
+ input_category_id_to_output_taxon_string[input_category_id]
2370
+
2371
+ output_common_name = output_taxon_string.split(';')[-1]
2372
+
2373
+ # Do we need to create a new output category?
2374
+ if output_taxon_string not in output_taxon_string_to_category_id:
2375
+ output_category_id = str(len(output_taxon_string_to_category_id))
2376
+ output_taxon_string_to_category_id[output_taxon_string] = \
2377
+ output_category_id
2378
+ output_category_id_to_common_name[output_category_id] = \
2379
+ output_common_name
2380
+ else:
2381
+ output_category_id = \
2382
+ output_taxon_string_to_category_id[output_taxon_string]
2383
+
2384
+ input_category_id_to_output_category_id[input_category_id] = \
2385
+ output_category_id
2386
+
2387
+ if False:
2388
+ print('Mapping {} ({}) to:\n{} ({})\n'.format(
2389
+ original_common_name,original_taxon_string,
2390
+ output_common_name,output_taxon_string))
2391
+ if False:
2392
+ print('Mapping {} to {}'.format(
2393
+ original_common_name,output_common_name,))
2394
+
2395
+ # ...for each category
2396
+
2397
+
2398
+ ##%% Remap all category labels
2399
+
2400
+ assert len(set(output_taxon_string_to_category_id.keys())) == \
2401
+ len(set(output_taxon_string_to_category_id.values()))
2402
+
2403
+ output_category_id_to_taxon_string = \
2404
+ invert_dictionary(output_taxon_string_to_category_id)
2405
+
2406
+ with open(input_file,'r') as f:
2407
+ output_data = json.load(f)
2408
+
2409
+ for im in tqdm(output_data['images']):
2410
+ if 'detections' in im and im['detections'] is not None:
2411
+ for det in im['detections']:
2412
+ if 'classifications' in det:
2413
+ for classification in det['classifications']:
2414
+ classification[0] = \
2415
+ input_category_id_to_output_category_id[classification[0]]
2416
+
2417
+ output_data['classification_categories'] = output_category_id_to_common_name
2418
+ output_data['classification_category_descriptions'] = \
2419
+ output_category_id_to_taxon_string
2420
+
2421
+
2422
+ ##%% Write output
2423
+
2424
+ with open(output_file,'w') as f:
2425
+ json.dump(output_data,f,indent=1)
2426
+
2427
+
1568
2428
  #%% Interactive driver(s)
1569
2429
 
1570
2430
  if False:
1571
2431
 
1572
2432
  pass
1573
2433
 
2434
+ #%% Shared cell to initialize geofencing and taxonomy information
2435
+
2436
+ from megadetector.utils.wi_utils import species_allowed_in_country # noqa
2437
+ from megadetector.utils.wi_utils import initialize_geofencing, initialize_taxonomy_info # noqa
2438
+ from megadetector.utils.wi_utils import _species_string_to_canonical_species_string # noqa
2439
+ from megadetector.utils.wi_utils import generate_csv_rows_for_species # noqa
2440
+ from megadetector.utils.wi_utils import _generate_csv_rows_to_block_all_countries_except # noqa
2441
+
2442
+ from megadetector.utils.wi_utils import taxonomy_string_to_geofencing_rules # noqa
2443
+ from megadetector.utils.wi_utils import taxonomy_string_to_taxonomy_info # noqa
2444
+
2445
+ geofencing_file = r'c:\git\cameratrapai\data\geofence_base.json'
2446
+
2447
+ country_code_file = r'g:\temp\country-codes.csv'
2448
+ # encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
2449
+ encoding = None; taxonomy_file = r'g:\temp\taxonomy_mapping.json'
2450
+
2451
+ initialize_geofencing(geofencing_file, country_code_file, force_init=True)
2452
+ initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
2453
+
2454
+
2455
+ #%% Test driver for geofence_fixes.csv function
2456
+
2457
+ block_except_list = 'AUS, PNG, THA, IDN, MYS'
2458
+ species = 'dingo'
2459
+ species_string = _species_string_to_canonical_species_string(species)
2460
+ rows = _generate_csv_rows_to_block_all_countries_except(species_string,block_except_list)
2461
+
2462
+ import clipboard; clipboard.copy('\n'.join(rows))
2463
+
2464
+
2465
+ #%%
2466
+
2467
+ generate_csv_rows_for_species(species_string=species_string,
2468
+ allow_countries=None,
2469
+ block_countries=None,
2470
+ allow_states=None,
2471
+ block_states=None,
2472
+ blockexcept_countries=None)
2473
+
2474
+
2475
+ _generate_csv_rows_to_block_all_countries_except(species_string,'AUS')
2476
+
2477
+
2478
+ #%% Test the effects of geofence changes
2479
+
2480
+ species = 'canis lupus dingo'
2481
+ country = 'guatemala'
2482
+ species_allowed_in_country(species,country,state=None,return_status=False)
2483
+
2484
+
1574
2485
  #%% instances.json generation test
1575
2486
 
1576
2487
  from megadetector.utils.wi_utils import generate_instances_json_from_folder # noqa
@@ -1604,13 +2515,6 @@ if False:
1604
2515
 
1605
2516
  #%% Geofencing tests
1606
2517
 
1607
- geofencing_file = r'g:\temp\geofence_mapping.json'
1608
- country_code_file = r'G:/temp/country-codes.csv'
1609
- encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
1610
-
1611
- initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
1612
- initialize_geofencing(geofencing_file, country_code_file, force_init=True)
1613
-
1614
2518
  species = 'didelphis marsupialis'
1615
2519
  print(binomial_name_to_taxonomy_info[species])
1616
2520
  country = 'Guatemala'
@@ -1624,13 +2528,6 @@ if False:
1624
2528
 
1625
2529
  #%% Test several species
1626
2530
 
1627
- geofencing_file = r'g:\temp\geofence_mapping.json'
1628
- country_code_file = r'G:/temp/country-codes.csv'
1629
- encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
1630
-
1631
- initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
1632
- initialize_geofencing(geofencing_file, country_code_file, force_init=True)
1633
-
1634
2531
  if True:
1635
2532
 
1636
2533
  # Make sure some Guatemalan species are allowed in Guatemala