megadetector 10.0.6__py3-none-any.whl → 10.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -13,14 +13,15 @@ Functions for postprocessing species classification results, particularly:
13
13
 
14
14
  #%% Constants and imports
15
15
 
16
- import os
17
16
  import json
18
17
  import copy
18
+ import pandas as pd
19
19
 
20
20
  from collections import defaultdict
21
21
  from tqdm import tqdm
22
22
 
23
23
  from megadetector.utils.ct_utils import is_list_sorted
24
+ from megadetector.utils.ct_utils import is_empty
24
25
  from megadetector.utils.ct_utils import sort_dictionary_by_value
25
26
  from megadetector.utils.ct_utils import sort_dictionary_by_key
26
27
  from megadetector.utils.ct_utils import invert_dictionary
@@ -29,9 +30,9 @@ from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
29
30
  from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
30
31
  from megadetector.utils.wi_taxonomy_utils import taxonomy_level_string_to_index
31
32
 
32
- from megadetector.utils.wi_taxonomy_utils import non_taxonomic_prediction_strings
33
33
  from megadetector.utils.wi_taxonomy_utils import human_prediction_string
34
34
  from megadetector.utils.wi_taxonomy_utils import animal_prediction_string
35
+ from megadetector.utils.wi_taxonomy_utils import is_taxonomic_prediction_string
35
36
  from megadetector.utils.wi_taxonomy_utils import blank_prediction_string # noqa
36
37
 
37
38
 
@@ -129,7 +130,7 @@ class ClassificationSmoothingOptions:
129
130
 
130
131
  ## Populated internally
131
132
 
132
- #: #: Only include these categories in the smoothing process (None to use all categories)
133
+ #: Only include these categories in the smoothing process (None to use all categories)
133
134
  self._detection_category_ids_to_smooth = None
134
135
 
135
136
 
@@ -1014,6 +1015,10 @@ def smooth_classification_results_sequence_level(input_file,
1014
1015
 
1015
1016
  detections_this_sequence = []
1016
1017
  for image_filename in image_filenames_this_sequence:
1018
+ if image_filename not in image_fn_to_classification_results:
1019
+ print('Warning: {} in sequence list but not in results'.format(
1020
+ image_filename))
1021
+ continue
1017
1022
  im = image_fn_to_classification_results[image_filename]
1018
1023
  if 'detections' not in im or im['detections'] is None:
1019
1024
  continue
@@ -1101,16 +1106,16 @@ def restrict_to_taxa_list(taxa_list,
1101
1106
  output_file,
1102
1107
  allow_walk_down=False,
1103
1108
  add_pre_filtering_description=True,
1104
- allow_redundant_latin_names=False):
1109
+ allow_redundant_latin_names=True,
1110
+ protected_common_names=None,
1111
+ use_original_common_names_if_available=True,
1112
+ verbose=True):
1105
1113
  """
1106
1114
  Given a prediction file in MD .json format, likely without having had
1107
1115
  a geofence applied, apply a custom taxa list.
1108
1116
 
1109
1117
  Args:
1110
- taxa_list (str or list): list of latin names, or a text file containing
1111
- a list of latin names. Optionally may contain a second (comma-delimited)
1112
- column containing common names, used only for debugging. Latin names
1113
- must exist in the SpeciesNet taxonomy.
1118
+ taxa_list (str): .csv file with at least the columns "latin" and "common".
1114
1119
  speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1115
1120
  model release (with 7-token taxonomy entries)
1116
1121
  input_file (str): .json file to read, in MD format. This can be None, in which
@@ -1128,45 +1133,73 @@ def restrict_to_taxa_list(taxa_list,
1128
1133
  if the same latin name appears twice in the taxonomy list; if True, we'll
1129
1134
  just print a warning and ignore all entries other than the first for this
1130
1135
  latin name
1136
+ protected_common_names (list, optional): these categories should be
1137
+ unmodified, even if they aren't used, or have the same taxonomic
1138
+ description as other categories
1139
+ use_original_common_names_if_available (bool, optional): if an "original_common"
1140
+ column is present in [taxa_list], use those common names instead of the ones
1141
+ in the taxonomy file
1142
+ verbose (bool, optional): enable additional debug output
1131
1143
  """
1132
1144
 
1133
1145
  ##%% Read target taxa list
1134
1146
 
1135
- if isinstance(taxa_list,str):
1136
- assert os.path.isfile(taxa_list), \
1137
- 'Could not find taxa list file {}'.format(taxa_list)
1138
- with open(taxa_list,'r') as f:
1139
- taxa_list = f.readlines()
1147
+ taxa_list_df = pd.read_csv(taxa_list)
1148
+
1149
+ required_columns = ('latin','common')
1150
+ for s in required_columns:
1151
+ assert s in taxa_list_df.columns, \
1152
+ 'Required column {} missing from taxonomy list file {}'.format(
1153
+ s,taxa_list)
1154
+
1155
+ # Convert the "latin" and "common" columns in taxa_list_df to lowercase
1156
+ taxa_list_df['latin'] = taxa_list_df['latin'].str.lower()
1157
+ taxa_list_df['common'] = taxa_list_df['common'].str.lower()
1140
1158
 
1141
- taxa_list = [s.strip().lower() for s in taxa_list]
1142
- taxa_list = [s for s in taxa_list if len(s) > 0]
1159
+ # Remove rows from taxa_list_df where the "latin" column is nan,
1160
+ # printing a warning for each row (with a string representation of the whole row)
1161
+ for i_row,row in taxa_list_df.iterrows():
1162
+ if pd.isna(row['latin']):
1163
+ if verbose:
1164
+ print('Warning: Skipping row with empty "latin" column in {}:\n{}\n'.format(
1165
+ taxa_list,str(row.to_dict())))
1166
+ taxa_list_df.drop(index=i_row, inplace=True)
1143
1167
 
1168
+ # Convert all NaN values in the "common" column to empty strings
1169
+ taxa_list_df['common'] = taxa_list_df['common'].fillna('')
1170
+
1171
+ # Create a dictionary mapping latin names to common names
1144
1172
  target_latin_to_common = {}
1145
1173
 
1146
- for s in taxa_list:
1174
+ for i_row,row in taxa_list_df.iterrows():
1147
1175
 
1148
- if s.strip().startswith('#'):
1149
- continue
1150
- tokens = s.split(',')
1151
- # We allow additional columns now
1152
- # assert len(tokens) <= 2
1153
- binomial_name = tokens[0]
1154
- assert len(binomial_name.split(' ')) in (1,2,3), \
1155
- 'Illegal binomial name in species list: {}'.format(binomial_name)
1156
- if len(tokens) > 0:
1157
- common_name = tokens[1].strip().lower()
1158
- else:
1159
- common_name = None
1160
- if binomial_name in target_latin_to_common:
1161
- error_string = 'scientific name {} appears multiple times in the taxonomy list'.format(
1162
- binomial_name)
1176
+ latin = row['latin']
1177
+ common = row['common']
1178
+
1179
+ if use_original_common_names_if_available and \
1180
+ ('original_common' in row) and \
1181
+ (not is_empty(row['original_common'])):
1182
+ common = row['original_common'].strip().lower()
1183
+
1184
+ # Valid latin names have either one token (e.g. "canidae"),
1185
+ # two tokens (e.g. "bos taurus"), or three tokens (e.g. "canis lupus familiaris")
1186
+ assert len(latin.split(' ')) in (1,2,3), \
1187
+ 'Illegal binomial name {} in taxaonomy list {}'.format(
1188
+ latin,taxa_list)
1189
+
1190
+ if latin in target_latin_to_common:
1191
+ error_string = \
1192
+ 'scientific name {} appears multiple times in the taxonomy list'.format(
1193
+ latin)
1163
1194
  if allow_redundant_latin_names:
1164
- print('Warning: {}'.format(error_string))
1195
+ if verbose:
1196
+ print('Warning: {}'.format(error_string))
1165
1197
  else:
1166
1198
  raise ValueError(error_string)
1167
- target_latin_to_common[binomial_name] = common_name
1168
1199
 
1169
- # ...for each line in the taxonomy file
1200
+ target_latin_to_common[latin] = common
1201
+
1202
+ # ...for each row in the custom taxonomy list
1170
1203
 
1171
1204
 
1172
1205
  ##%% Read taxonomy file
@@ -1185,7 +1218,7 @@ def restrict_to_taxa_list(taxa_list,
1185
1218
  def _insert_taxonomy_string(s):
1186
1219
 
1187
1220
  tokens = s.split(';')
1188
- assert len(tokens) == 7
1221
+ assert len(tokens) == 7, 'Illegal taxonomy string {}'.format(s)
1189
1222
 
1190
1223
  guid = tokens[0] # noqa
1191
1224
  class_name = tokens[1]
@@ -1196,20 +1229,24 @@ def restrict_to_taxa_list(taxa_list,
1196
1229
  common_name = tokens[6]
1197
1230
 
1198
1231
  if len(class_name) == 0:
1199
- assert common_name in ('animal','vehicle','blank')
1232
+ assert common_name in ('animal','vehicle','blank'), \
1233
+ 'Illegal common name {}'.format(common_name)
1200
1234
  return
1201
1235
 
1202
1236
  if len(species) > 0:
1203
- assert all([len(s) > 0 for s in [genus,family,order]])
1237
+ assert all([len(s) > 0 for s in [genus,family,order]]), \
1238
+ 'Higher-level taxa missing for {}: {},{},{}'.format(s,genus,family,order)
1204
1239
  binomial_name = genus + ' ' + species
1205
1240
  if binomial_name not in speciesnet_latin_name_to_taxon_string:
1206
1241
  speciesnet_latin_name_to_taxon_string[binomial_name] = s
1207
1242
  elif len(genus) > 0:
1208
- assert all([len(s) > 0 for s in [family,order]])
1243
+ assert all([len(s) > 0 for s in [family,order]]), \
1244
+ 'Higher-level taxa missing for {}: {},{}'.format(s,family,order)
1209
1245
  if genus not in speciesnet_latin_name_to_taxon_string:
1210
1246
  speciesnet_latin_name_to_taxon_string[genus] = s
1211
1247
  elif len(family) > 0:
1212
- assert len(order) > 0
1248
+ assert len(order) > 0, \
1249
+ 'Higher-level taxa missing for {}: {}'.format(s,order)
1213
1250
  if family not in speciesnet_latin_name_to_taxon_string:
1214
1251
  speciesnet_latin_name_to_taxon_string[family] = s
1215
1252
  elif len(order) > 0:
@@ -1232,12 +1269,19 @@ def restrict_to_taxa_list(taxa_list,
1232
1269
 
1233
1270
  # In theory any taxon that appears as the parent of another taxon should
1234
1271
  # also be in the taxonomy, but this isn't always true, so we fix it here.
1235
-
1236
1272
  new_taxon_string_to_missing_tokens = defaultdict(list)
1237
1273
 
1274
+ # While we're making this loop, also see whether we need to store any custom
1275
+ # common name mappings based on the taxonomy list.
1276
+ speciesnet_latin_name_to_output_common_name = {}
1277
+
1238
1278
  # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
1239
1279
  for latin_name in speciesnet_latin_name_to_taxon_string.keys():
1240
1280
 
1281
+ if latin_name in target_latin_to_common:
1282
+ speciesnet_latin_name_to_output_common_name[latin_name] = \
1283
+ target_latin_to_common[latin_name]
1284
+
1241
1285
  if 'no cv result' in latin_name:
1242
1286
  continue
1243
1287
 
@@ -1260,7 +1304,8 @@ def restrict_to_taxa_list(taxa_list,
1260
1304
  for i_copy_token in range(1,i_token+1):
1261
1305
  new_tokens[i_copy_token] = tokens[i_copy_token]
1262
1306
  new_tokens[-1] = test_token + ' species'
1263
- assert new_tokens[-2] == ''
1307
+ assert new_tokens[-2] == '', \
1308
+ 'Illegal taxonomy string {}'.format(taxon_string)
1264
1309
  new_taxon_string = ';'.join(new_tokens)
1265
1310
  # assert new_taxon_string not in new_taxon_strings
1266
1311
  new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
@@ -1269,14 +1314,19 @@ def restrict_to_taxa_list(taxa_list,
1269
1314
 
1270
1315
  # ...for each taxon
1271
1316
 
1272
- print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
1273
- len(new_taxon_string_to_missing_tokens)))
1274
-
1275
1317
  new_taxon_string_to_missing_tokens = \
1276
1318
  sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
1277
- for taxon_string in new_taxon_string_to_missing_tokens:
1278
- missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1279
- print('{} ({})'.format(taxon_string,missing_taxa))
1319
+
1320
+ if verbose:
1321
+
1322
+ print(f'Found {len(new_taxon_string_to_missing_tokens)} taxa that need to be inserted to ' + \
1323
+ 'make the taxonomy valid, showing only mammals and birds here:\n')
1324
+
1325
+ for taxon_string in new_taxon_string_to_missing_tokens:
1326
+ if 'mammalia' not in taxon_string and 'aves' not in taxon_string:
1327
+ continue
1328
+ missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1329
+ print('{} ({})'.format(taxon_string,missing_taxa))
1280
1330
 
1281
1331
  for new_taxon_string in new_taxon_string_to_missing_tokens:
1282
1332
  _insert_taxonomy_string(new_taxon_string)
@@ -1298,7 +1348,7 @@ def restrict_to_taxa_list(taxa_list,
1298
1348
  n_failed_mappings += 1
1299
1349
 
1300
1350
  if n_failed_mappings > 0:
1301
- raise ValueError('Cannot continue with geofence generation')
1351
+ raise ValueError('Cannot continue with taxonomic restriction')
1302
1352
 
1303
1353
 
1304
1354
  ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
@@ -1312,7 +1362,8 @@ def restrict_to_taxa_list(taxa_list,
1312
1362
 
1313
1363
  taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1314
1364
  tokens = taxon_string.split(';')
1315
- assert len(tokens) == 7
1365
+ assert len(tokens) == 7, \
1366
+ 'Illegal taxonomy string {}'.format(taxon_string)
1316
1367
 
1317
1368
  # Remove GUID and common mame
1318
1369
  #
@@ -1324,25 +1375,85 @@ def restrict_to_taxa_list(taxa_list,
1324
1375
  # If this is a species
1325
1376
  if len(tokens[-1]) > 0:
1326
1377
  binomial_name = tokens[-2] + ' ' + tokens[-1]
1327
- assert binomial_name == latin_name
1378
+ assert binomial_name == latin_name, \
1379
+ 'Binomial/latin mismatch: {} vs {}'.format(binomial_name,latin_name)
1380
+ # If this already exists, it should only allow "None"
1381
+ if binomial_name in allowed_parent_taxon_to_child_taxa:
1382
+ assert len(allowed_parent_taxon_to_child_taxa[binomial_name]) == 1, \
1383
+ 'Species-level entry {} has multiple children'.format(binomial_name)
1384
+ assert None in allowed_parent_taxon_to_child_taxa[binomial_name], \
1385
+ 'Species-level entry {} has non-None children'.format(binomial_name)
1328
1386
  allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
1329
1387
  child_taxon = binomial_name
1330
1388
 
1331
- # The first candidate parent is the genus
1389
+ # The first level that can ever be a parent taxon is the genus level
1332
1390
  parent_token_index = len(tokens) - 2
1333
1391
 
1392
+ # Walk up from genus to family
1334
1393
  while(parent_token_index >= 0):
1335
1394
 
1395
+ # "None" is our leaf node marker, we should never have ''
1396
+ if child_taxon is not None:
1397
+ assert len(child_taxon) > 0
1398
+
1336
1399
  parent_taxon = tokens[parent_token_index]
1337
- allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1338
- child_taxon = parent_taxon
1400
+
1401
+ # Don't create entries for blank taxa
1402
+ if (len(parent_taxon) > 0):
1403
+
1404
+ create_child = True
1405
+
1406
+ # This is the lowest-level taxon in this entry
1407
+ if (child_taxon is None):
1408
+
1409
+ # ...but we don't want to remove existing children from any parents
1410
+ if (parent_taxon in allowed_parent_taxon_to_child_taxa) and \
1411
+ (len(allowed_parent_taxon_to_child_taxa[parent_taxon]) > 0):
1412
+ if verbose:
1413
+ existing_children_string = str(allowed_parent_taxon_to_child_taxa[parent_taxon])
1414
+ print('Not creating empty child for parent {} (already has children {})'.format(
1415
+ parent_taxon,existing_children_string))
1416
+ create_child = False
1417
+
1418
+ # If we're adding a new child entry, clear out any leaf node markers
1419
+ else:
1420
+
1421
+ if (parent_taxon in allowed_parent_taxon_to_child_taxa) and \
1422
+ (None in allowed_parent_taxon_to_child_taxa[parent_taxon]):
1423
+
1424
+ assert len(allowed_parent_taxon_to_child_taxa[parent_taxon]) == 1, \
1425
+ 'Illlegal parent/child configuration'
1426
+
1427
+ if verbose:
1428
+ print('Un-marking parent {} as a leaf node because of child {}'.format(
1429
+ parent_taxon,child_taxon))
1430
+
1431
+ allowed_parent_taxon_to_child_taxa[parent_taxon] = set()
1432
+
1433
+ if create_child:
1434
+ allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1435
+
1436
+ # If we haven't hit a non-empty taxon yet, don't update "child_taxon"
1437
+ assert len(parent_taxon) > 0
1438
+ child_taxon = parent_taxon
1439
+
1440
+ # ...if we have a non-empty taxon
1441
+
1339
1442
  parent_token_index -= 1
1340
1443
 
1444
+ # ...for each taxonomic level
1445
+
1341
1446
  # ...for each allowed latin name
1342
1447
 
1343
1448
  allowed_parent_taxon_to_child_taxa = \
1344
1449
  sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
1345
1450
 
1451
+ for parent_taxon in allowed_parent_taxon_to_child_taxa:
1452
+ # "None" should only ever appear alone; this marks a leaf node with no children
1453
+ if None in allowed_parent_taxon_to_child_taxa[parent_taxon]:
1454
+ assert len(allowed_parent_taxon_to_child_taxa[parent_taxon]) == 1, \
1455
+ '"None" should only appear alone in a child taxon list'
1456
+
1346
1457
 
1347
1458
  ##%% If we were just validating the custom taxa file, we're done
1348
1459
 
@@ -1369,11 +1480,25 @@ def restrict_to_taxa_list(taxa_list,
1369
1480
 
1370
1481
  input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
1371
1482
  input_taxon_tokens = input_taxon_string.split(';')
1372
- assert len(input_taxon_tokens) == 7
1483
+ assert len(input_taxon_tokens) == 7, \
1484
+ 'Illegal taxonomy string: {}'.format(input_taxon_string)
1373
1485
 
1374
- # Don't mess with blank/no-cv-result/animal/human
1375
- if (input_taxon_string in non_taxonomic_prediction_strings) or \
1486
+ # Don't mess with blank/no-cv-result/human (or "animal", which is really "unknown")
1487
+ if (not is_taxonomic_prediction_string(input_taxon_string)) or \
1376
1488
  (input_taxon_string == human_prediction_string):
1489
+ if verbose:
1490
+ print('Not messing with non-taxonomic category {}'.format(input_taxon_string))
1491
+ input_category_id_to_output_taxon_string[input_category_id] = \
1492
+ input_taxon_string
1493
+ continue
1494
+
1495
+ # Don't mess with protected categories
1496
+ common_name = input_taxon_tokens[-1]
1497
+
1498
+ if (protected_common_names is not None) and \
1499
+ (common_name in protected_common_names):
1500
+ if verbose:
1501
+ print('Not messing with protected category {}'.format(common_name))
1377
1502
  input_category_id_to_output_taxon_string[input_category_id] = \
1378
1503
  input_taxon_string
1379
1504
  continue
@@ -1403,19 +1528,23 @@ def restrict_to_taxa_list(taxa_list,
1403
1528
  test_index -= 1
1404
1529
  continue
1405
1530
 
1406
- assert test_taxon_name in speciesnet_latin_name_to_taxon_string
1531
+ assert test_taxon_name in speciesnet_latin_name_to_taxon_string, \
1532
+ '{} should be a substring of {}'.format(test_taxon_name,
1533
+ speciesnet_latin_name_to_taxon_string)
1407
1534
 
1408
1535
  # Is this taxon allowed according to the custom species list?
1409
1536
  if test_taxon_name in allowed_parent_taxon_to_child_taxa:
1410
1537
 
1411
1538
  allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
1412
- assert allowed_child_taxa is not None
1539
+ assert allowed_child_taxa is not None, \
1540
+ 'allowed_child_taxa should not be None: {}'.format(test_taxon_name)
1413
1541
 
1414
1542
  # If this is the lowest-level allowable token or there is not a
1415
1543
  # unique child, don't walk any further, even if walking down
1416
1544
  # is enabled.
1417
- if (None in allowed_child_taxa):
1418
- assert len(allowed_child_taxa) == 1
1545
+ if None in allowed_child_taxa:
1546
+ assert len(allowed_child_taxa) == 1, \
1547
+ '"None" should not be listed as a child taxa with other child taxa'
1419
1548
 
1420
1549
  if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
1421
1550
  target_taxon = test_taxon_name
@@ -1427,8 +1556,12 @@ def restrict_to_taxa_list(taxa_list,
1427
1556
  while ((next(iter(allowed_child_taxa)) is not None) and \
1428
1557
  (len(allowed_child_taxa) == 1)):
1429
1558
  candidate_taxon = next(iter(allowed_child_taxa))
1430
- assert candidate_taxon in allowed_parent_taxon_to_child_taxa
1431
- assert candidate_taxon in speciesnet_latin_name_to_taxon_string
1559
+ assert candidate_taxon in allowed_parent_taxon_to_child_taxa, \
1560
+ '{} should be a subset of {}'.format(
1561
+ candidate_taxon,allowed_parent_taxon_to_child_taxa)
1562
+ assert candidate_taxon in speciesnet_latin_name_to_taxon_string, \
1563
+ '{} should be a subset of {}'.format(
1564
+ candidate_taxon,speciesnet_latin_name_to_taxon_string)
1432
1565
  allowed_child_taxa = \
1433
1566
  allowed_parent_taxon_to_child_taxa[candidate_taxon]
1434
1567
  target_taxon = candidate_taxon
@@ -1450,21 +1583,30 @@ def restrict_to_taxa_list(taxa_list,
1450
1583
 
1451
1584
  ##%% Build the new tables
1452
1585
 
1586
+ speciesnet_taxon_string_to_latin_name = invert_dictionary(speciesnet_latin_name_to_taxon_string)
1587
+
1453
1588
  input_category_id_to_output_category_id = {}
1454
1589
  output_taxon_string_to_category_id = {}
1455
1590
  output_category_id_to_common_name = {}
1456
1591
 
1457
1592
  for input_category_id in input_category_id_to_output_taxon_string:
1458
1593
 
1459
- original_common_name = \
1460
- input_category_id_to_common_name[input_category_id]
1461
- original_taxon_string = \
1462
- input_category_id_to_taxonomy_string[input_category_id]
1463
1594
  output_taxon_string = \
1464
1595
  input_category_id_to_output_taxon_string[input_category_id]
1465
1596
 
1466
1597
  output_common_name = output_taxon_string.split(';')[-1]
1467
1598
 
1599
+ # Possibly substitute a custom common name
1600
+ if output_taxon_string in speciesnet_taxon_string_to_latin_name:
1601
+
1602
+ speciesnet_latin_name = speciesnet_taxon_string_to_latin_name[output_taxon_string]
1603
+
1604
+ if speciesnet_latin_name in speciesnet_latin_name_to_output_common_name:
1605
+ custom_common_name = speciesnet_latin_name_to_output_common_name[speciesnet_latin_name]
1606
+ if custom_common_name != output_common_name:
1607
+ print('Substituting common name {} for {}'.format(custom_common_name,output_common_name))
1608
+ output_common_name = custom_common_name
1609
+
1468
1610
  # Do we need to create a new output category?
1469
1611
  if output_taxon_string not in output_taxon_string_to_category_id:
1470
1612
  output_category_id = str(len(output_taxon_string_to_category_id))
@@ -1479,21 +1621,28 @@ def restrict_to_taxa_list(taxa_list,
1479
1621
  input_category_id_to_output_category_id[input_category_id] = \
1480
1622
  output_category_id
1481
1623
 
1624
+ # Sometimes-useful debug printouts
1482
1625
  if False:
1626
+ original_common_name = \
1627
+ input_category_id_to_common_name[input_category_id]
1628
+
1629
+ original_taxon_string = \
1630
+ input_category_id_to_taxonomy_string[input_category_id]
1631
+
1483
1632
  print('Mapping {} ({}) to:\n{} ({})\n'.format(
1484
1633
  original_common_name,original_taxon_string,
1485
1634
  output_common_name,output_taxon_string))
1486
- if False:
1487
1635
  print('Mapping {} to {}'.format(
1488
1636
  original_common_name,output_common_name,))
1489
1637
 
1490
1638
  # ...for each category
1491
1639
 
1492
1640
 
1493
- ##%% Remap all category labels
1641
+ #%% Remap all category labels
1494
1642
 
1495
1643
  assert len(set(output_taxon_string_to_category_id.keys())) == \
1496
- len(set(output_taxon_string_to_category_id.values()))
1644
+ len(set(output_taxon_string_to_category_id.values())), \
1645
+ 'Category ID/value non-uniqueness error'
1497
1646
 
1498
1647
  output_category_id_to_taxon_string = \
1499
1648
  invert_dictionary(output_taxon_string_to_category_id)