megadetector 10.0.6__py3-none-any.whl → 10.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -13,14 +13,15 @@ Functions for postprocessing species classification results, particularly:
13
13
 
14
14
  #%% Constants and imports
15
15
 
16
- import os
17
16
  import json
18
17
  import copy
18
+ import pandas as pd
19
19
 
20
20
  from collections import defaultdict
21
21
  from tqdm import tqdm
22
22
 
23
23
  from megadetector.utils.ct_utils import is_list_sorted
24
+ from megadetector.utils.ct_utils import is_empty
24
25
  from megadetector.utils.ct_utils import sort_dictionary_by_value
25
26
  from megadetector.utils.ct_utils import sort_dictionary_by_key
26
27
  from megadetector.utils.ct_utils import invert_dictionary
@@ -29,9 +30,9 @@ from megadetector.utils.wi_taxonomy_utils import clean_taxonomy_string
29
30
  from megadetector.utils.wi_taxonomy_utils import taxonomy_level_index
30
31
  from megadetector.utils.wi_taxonomy_utils import taxonomy_level_string_to_index
31
32
 
32
- from megadetector.utils.wi_taxonomy_utils import non_taxonomic_prediction_strings
33
33
  from megadetector.utils.wi_taxonomy_utils import human_prediction_string
34
34
  from megadetector.utils.wi_taxonomy_utils import animal_prediction_string
35
+ from megadetector.utils.wi_taxonomy_utils import is_taxonomic_prediction_string
35
36
  from megadetector.utils.wi_taxonomy_utils import blank_prediction_string # noqa
36
37
 
37
38
 
@@ -1101,16 +1102,16 @@ def restrict_to_taxa_list(taxa_list,
1101
1102
  output_file,
1102
1103
  allow_walk_down=False,
1103
1104
  add_pre_filtering_description=True,
1104
- allow_redundant_latin_names=False):
1105
+ allow_redundant_latin_names=True,
1106
+ protected_common_names=None,
1107
+ use_original_common_names_if_available=True,
1108
+ verbose=True):
1105
1109
  """
1106
1110
  Given a prediction file in MD .json format, likely without having had
1107
1111
  a geofence applied, apply a custom taxa list.
1108
1112
 
1109
1113
  Args:
1110
- taxa_list (str or list): list of latin names, or a text file containing
1111
- a list of latin names. Optionally may contain a second (comma-delimited)
1112
- column containing common names, used only for debugging. Latin names
1113
- must exist in the SpeciesNet taxonomy.
1114
+ taxa_list (str): .csv file with at least the columns "latin" and "common".
1114
1115
  speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1115
1116
  model release (with 7-token taxonomy entries)
1116
1117
  input_file (str): .json file to read, in MD format. This can be None, in which
@@ -1128,45 +1129,73 @@ def restrict_to_taxa_list(taxa_list,
1128
1129
  if the same latin name appears twice in the taxonomy list; if True, we'll
1129
1130
  just print a warning and ignore all entries other than the first for this
1130
1131
  latin name
1132
+ protected_common_names (list, optional): these categories should be
1133
+ unmodified, even if they aren't used, or have the same taxonomic
1134
+ description as other categories
1135
+ use_original_common_names_if_available (bool, optional): if an "original_common"
1136
+ column is present in [taxa_list], use those common names instead of the ones
1137
+ in the taxonomy file
1138
+ verbose (bool, optional): enable additional debug output
1131
1139
  """
1132
1140
 
1133
1141
  ##%% Read target taxa list
1134
1142
 
1135
- if isinstance(taxa_list,str):
1136
- assert os.path.isfile(taxa_list), \
1137
- 'Could not find taxa list file {}'.format(taxa_list)
1138
- with open(taxa_list,'r') as f:
1139
- taxa_list = f.readlines()
1143
+ taxa_list_df = pd.read_csv(taxa_list)
1140
1144
 
1141
- taxa_list = [s.strip().lower() for s in taxa_list]
1142
- taxa_list = [s for s in taxa_list if len(s) > 0]
1145
+ required_columns = ('latin','common')
1146
+ for s in required_columns:
1147
+ assert s in taxa_list_df.columns, \
1148
+ 'Required column {} missing from taxonomy list file {}'.format(
1149
+ s,taxa_list)
1143
1150
 
1151
+ # Convert the "latin" and "common" columns in taxa_list_df to lowercase
1152
+ taxa_list_df['latin'] = taxa_list_df['latin'].str.lower()
1153
+ taxa_list_df['common'] = taxa_list_df['common'].str.lower()
1154
+
1155
+ # Remove rows from taxa_list_df where the "latin" column is nan,
1156
+ # printing a warning for each row (with a string representation of the whole row)
1157
+ for i_row,row in taxa_list_df.iterrows():
1158
+ if pd.isna(row['latin']):
1159
+ if verbose:
1160
+ print('Warning: Skipping row with empty "latin" column in {}:\n{}\n'.format(
1161
+ taxa_list,str(row.to_dict())))
1162
+ taxa_list_df.drop(index=i_row, inplace=True)
1163
+
1164
+ # Convert all NaN values in the "common" column to empty strings
1165
+ taxa_list_df['common'] = taxa_list_df['common'].fillna('')
1166
+
1167
+ # Create a dictionary mapping latin names to common names
1144
1168
  target_latin_to_common = {}
1145
1169
 
1146
- for s in taxa_list:
1170
+ for i_row,row in taxa_list_df.iterrows():
1147
1171
 
1148
- if s.strip().startswith('#'):
1149
- continue
1150
- tokens = s.split(',')
1151
- # We allow additional columns now
1152
- # assert len(tokens) <= 2
1153
- binomial_name = tokens[0]
1154
- assert len(binomial_name.split(' ')) in (1,2,3), \
1155
- 'Illegal binomial name in species list: {}'.format(binomial_name)
1156
- if len(tokens) > 0:
1157
- common_name = tokens[1].strip().lower()
1158
- else:
1159
- common_name = None
1160
- if binomial_name in target_latin_to_common:
1161
- error_string = 'scientific name {} appears multiple times in the taxonomy list'.format(
1162
- binomial_name)
1172
+ latin = row['latin']
1173
+ common = row['common']
1174
+
1175
+ if use_original_common_names_if_available and \
1176
+ ('original_common' in row) and \
1177
+ (not is_empty(row['original_common'])):
1178
+ common = row['original_common'].strip().lower()
1179
+
1180
+ # Valid latin names have either one token (e.g. "canidae"),
1181
+ # two tokens (e.g. "bos taurus"), or three tokens (e.g. "canis lupus familiaris")
1182
+ assert len(latin.split(' ')) in (1,2,3), \
1183
+ 'Illegal binomial name {} in taxaonomy list {}'.format(
1184
+ latin,taxa_list)
1185
+
1186
+ if latin in target_latin_to_common:
1187
+ error_string = \
1188
+ 'scientific name {} appears multiple times in the taxonomy list'.format(
1189
+ latin)
1163
1190
  if allow_redundant_latin_names:
1164
- print('Warning: {}'.format(error_string))
1191
+ if verbose:
1192
+ print('Warning: {}'.format(error_string))
1165
1193
  else:
1166
1194
  raise ValueError(error_string)
1167
- target_latin_to_common[binomial_name] = common_name
1168
1195
 
1169
- # ...for each line in the taxonomy file
1196
+ target_latin_to_common[latin] = common
1197
+
1198
+ # ...for each row in the custom taxonomy list
1170
1199
 
1171
1200
 
1172
1201
  ##%% Read taxonomy file
@@ -1185,7 +1214,7 @@ def restrict_to_taxa_list(taxa_list,
1185
1214
  def _insert_taxonomy_string(s):
1186
1215
 
1187
1216
  tokens = s.split(';')
1188
- assert len(tokens) == 7
1217
+ assert len(tokens) == 7, 'Illegal taxonomy string {}'.format(s)
1189
1218
 
1190
1219
  guid = tokens[0] # noqa
1191
1220
  class_name = tokens[1]
@@ -1196,20 +1225,24 @@ def restrict_to_taxa_list(taxa_list,
1196
1225
  common_name = tokens[6]
1197
1226
 
1198
1227
  if len(class_name) == 0:
1199
- assert common_name in ('animal','vehicle','blank')
1228
+ assert common_name in ('animal','vehicle','blank'), \
1229
+ 'Illegal common name {}'.format(common_name)
1200
1230
  return
1201
1231
 
1202
1232
  if len(species) > 0:
1203
- assert all([len(s) > 0 for s in [genus,family,order]])
1233
+ assert all([len(s) > 0 for s in [genus,family,order]]), \
1234
+ 'Higher-level taxa missing for {}: {},{},{}'.format(s,genus,family,order)
1204
1235
  binomial_name = genus + ' ' + species
1205
1236
  if binomial_name not in speciesnet_latin_name_to_taxon_string:
1206
1237
  speciesnet_latin_name_to_taxon_string[binomial_name] = s
1207
1238
  elif len(genus) > 0:
1208
- assert all([len(s) > 0 for s in [family,order]])
1239
+ assert all([len(s) > 0 for s in [family,order]]), \
1240
+ 'Higher-level taxa missing for {}: {},{}'.format(s,family,order)
1209
1241
  if genus not in speciesnet_latin_name_to_taxon_string:
1210
1242
  speciesnet_latin_name_to_taxon_string[genus] = s
1211
1243
  elif len(family) > 0:
1212
- assert len(order) > 0
1244
+ assert len(order) > 0, \
1245
+ 'Higher-level taxa missing for {}: {}'.format(s,order)
1213
1246
  if family not in speciesnet_latin_name_to_taxon_string:
1214
1247
  speciesnet_latin_name_to_taxon_string[family] = s
1215
1248
  elif len(order) > 0:
@@ -1232,12 +1265,19 @@ def restrict_to_taxa_list(taxa_list,
1232
1265
 
1233
1266
  # In theory any taxon that appears as the parent of another taxon should
1234
1267
  # also be in the taxonomy, but this isn't always true, so we fix it here.
1235
-
1236
1268
  new_taxon_string_to_missing_tokens = defaultdict(list)
1237
1269
 
1270
+ # While we're making this loop, also see whether we need to store any custom
1271
+ # common name mappings based on the taxonomy list.
1272
+ speciesnet_latin_name_to_output_common_name = {}
1273
+
1238
1274
  # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
1239
1275
  for latin_name in speciesnet_latin_name_to_taxon_string.keys():
1240
1276
 
1277
+ if latin_name in target_latin_to_common:
1278
+ speciesnet_latin_name_to_output_common_name[latin_name] = \
1279
+ target_latin_to_common[latin_name]
1280
+
1241
1281
  if 'no cv result' in latin_name:
1242
1282
  continue
1243
1283
 
@@ -1260,7 +1300,8 @@ def restrict_to_taxa_list(taxa_list,
1260
1300
  for i_copy_token in range(1,i_token+1):
1261
1301
  new_tokens[i_copy_token] = tokens[i_copy_token]
1262
1302
  new_tokens[-1] = test_token + ' species'
1263
- assert new_tokens[-2] == ''
1303
+ assert new_tokens[-2] == '', \
1304
+ 'Illegal taxonomy string {}'.format(taxon_string)
1264
1305
  new_taxon_string = ';'.join(new_tokens)
1265
1306
  # assert new_taxon_string not in new_taxon_strings
1266
1307
  new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
@@ -1269,14 +1310,19 @@ def restrict_to_taxa_list(taxa_list,
1269
1310
 
1270
1311
  # ...for each taxon
1271
1312
 
1272
- print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
1273
- len(new_taxon_string_to_missing_tokens)))
1274
-
1275
1313
  new_taxon_string_to_missing_tokens = \
1276
1314
  sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
1277
- for taxon_string in new_taxon_string_to_missing_tokens:
1278
- missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1279
- print('{} ({})'.format(taxon_string,missing_taxa))
1315
+
1316
+ if verbose:
1317
+
1318
+ print(f'Found {len(new_taxon_string_to_missing_tokens)} taxa that need to be inserted to ' + \
1319
+ 'make the taxonomy valid, showing only mammals and birds here:\n')
1320
+
1321
+ for taxon_string in new_taxon_string_to_missing_tokens:
1322
+ if 'mammalia' not in taxon_string and 'aves' not in taxon_string:
1323
+ continue
1324
+ missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1325
+ print('{} ({})'.format(taxon_string,missing_taxa))
1280
1326
 
1281
1327
  for new_taxon_string in new_taxon_string_to_missing_tokens:
1282
1328
  _insert_taxonomy_string(new_taxon_string)
@@ -1298,7 +1344,7 @@ def restrict_to_taxa_list(taxa_list,
1298
1344
  n_failed_mappings += 1
1299
1345
 
1300
1346
  if n_failed_mappings > 0:
1301
- raise ValueError('Cannot continue with geofence generation')
1347
+ raise ValueError('Cannot continue with taxonomic restriction')
1302
1348
 
1303
1349
 
1304
1350
  ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
@@ -1312,7 +1358,8 @@ def restrict_to_taxa_list(taxa_list,
1312
1358
 
1313
1359
  taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1314
1360
  tokens = taxon_string.split(';')
1315
- assert len(tokens) == 7
1361
+ assert len(tokens) == 7, \
1362
+ 'Illegal taxonomy string {}'.format(taxon_string)
1316
1363
 
1317
1364
  # Remove GUID and common mame
1318
1365
  #
@@ -1324,25 +1371,85 @@ def restrict_to_taxa_list(taxa_list,
1324
1371
  # If this is a species
1325
1372
  if len(tokens[-1]) > 0:
1326
1373
  binomial_name = tokens[-2] + ' ' + tokens[-1]
1327
- assert binomial_name == latin_name
1374
+ assert binomial_name == latin_name, \
1375
+ 'Binomial/latin mismatch: {} vs {}'.format(binomial_name,latin_name)
1376
+ # If this already exists, it should only allow "None"
1377
+ if binomial_name in allowed_parent_taxon_to_child_taxa:
1378
+ assert len(allowed_parent_taxon_to_child_taxa[binomial_name]) == 1, \
1379
+ 'Species-level entry {} has multiple children'.format(binomial_name)
1380
+ assert None in allowed_parent_taxon_to_child_taxa[binomial_name], \
1381
+ 'Species-level entry {} has non-None children'.format(binomial_name)
1328
1382
  allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
1329
1383
  child_taxon = binomial_name
1330
1384
 
1331
- # The first candidate parent is the genus
1385
+ # The first level that can ever be a parent taxon is the genus level
1332
1386
  parent_token_index = len(tokens) - 2
1333
1387
 
1388
+ # Walk up from genus to family
1334
1389
  while(parent_token_index >= 0):
1335
1390
 
1391
+ # "None" is our leaf node marker, we should never have ''
1392
+ if child_taxon is not None:
1393
+ assert len(child_taxon) > 0
1394
+
1336
1395
  parent_taxon = tokens[parent_token_index]
1337
- allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1338
- child_taxon = parent_taxon
1396
+
1397
+ # Don't create entries for blank taxa
1398
+ if (len(parent_taxon) > 0):
1399
+
1400
+ create_child = True
1401
+
1402
+ # This is the lowest-level taxon in this entry
1403
+ if (child_taxon is None):
1404
+
1405
+ # ...but we don't want to remove existing children from any parents
1406
+ if (parent_taxon in allowed_parent_taxon_to_child_taxa) and \
1407
+ (len(allowed_parent_taxon_to_child_taxa[parent_taxon]) > 0):
1408
+ if verbose:
1409
+ existing_children_string = str(allowed_parent_taxon_to_child_taxa[parent_taxon])
1410
+ print('Not creating empty child for parent {} (already has children {})'.format(
1411
+ parent_taxon,existing_children_string))
1412
+ create_child = False
1413
+
1414
+ # If we're adding a new child entry, clear out any leaf node markers
1415
+ else:
1416
+
1417
+ if (parent_taxon in allowed_parent_taxon_to_child_taxa) and \
1418
+ (None in allowed_parent_taxon_to_child_taxa[parent_taxon]):
1419
+
1420
+ assert len(allowed_parent_taxon_to_child_taxa[parent_taxon]) == 1, \
1421
+ 'Illlegal parent/child configuration'
1422
+
1423
+ if verbose:
1424
+ print('Un-marking parent {} as a leaf node because of child {}'.format(
1425
+ parent_taxon,child_taxon))
1426
+
1427
+ allowed_parent_taxon_to_child_taxa[parent_taxon] = set()
1428
+
1429
+ if create_child:
1430
+ allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1431
+
1432
+ # If we haven't hit a non-empty taxon yet, don't update "child_taxon"
1433
+ assert len(parent_taxon) > 0
1434
+ child_taxon = parent_taxon
1435
+
1436
+ # ...if we have a non-empty taxon
1437
+
1339
1438
  parent_token_index -= 1
1340
1439
 
1440
+ # ...for each taxonomic level
1441
+
1341
1442
  # ...for each allowed latin name
1342
1443
 
1343
1444
  allowed_parent_taxon_to_child_taxa = \
1344
1445
  sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
1345
1446
 
1447
+ for parent_taxon in allowed_parent_taxon_to_child_taxa:
1448
+ # "None" should only ever appear alone; this marks a leaf node with no children
1449
+ if None in allowed_parent_taxon_to_child_taxa[parent_taxon]:
1450
+ assert len(allowed_parent_taxon_to_child_taxa[parent_taxon]) == 1, \
1451
+ '"None" should only appear alone in a child taxon list'
1452
+
1346
1453
 
1347
1454
  ##%% If we were just validating the custom taxa file, we're done
1348
1455
 
@@ -1369,11 +1476,25 @@ def restrict_to_taxa_list(taxa_list,
1369
1476
 
1370
1477
  input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
1371
1478
  input_taxon_tokens = input_taxon_string.split(';')
1372
- assert len(input_taxon_tokens) == 7
1479
+ assert len(input_taxon_tokens) == 7, \
1480
+ 'Illegal taxonomy string: {}'.format(input_taxon_string)
1373
1481
 
1374
- # Don't mess with blank/no-cv-result/animal/human
1375
- if (input_taxon_string in non_taxonomic_prediction_strings) or \
1482
+ # Don't mess with blank/no-cv-result/human (or "animal", which is really "unknown")
1483
+ if (not is_taxonomic_prediction_string(input_taxon_string)) or \
1376
1484
  (input_taxon_string == human_prediction_string):
1485
+ if verbose:
1486
+ print('Not messing with non-taxonomic category {}'.format(input_taxon_string))
1487
+ input_category_id_to_output_taxon_string[input_category_id] = \
1488
+ input_taxon_string
1489
+ continue
1490
+
1491
+ # Don't mess with protected categories
1492
+ common_name = input_taxon_tokens[-1]
1493
+
1494
+ if (protected_common_names is not None) and \
1495
+ (common_name in protected_common_names):
1496
+ if verbose:
1497
+ print('Not messing with protected category {}'.format(common_name))
1377
1498
  input_category_id_to_output_taxon_string[input_category_id] = \
1378
1499
  input_taxon_string
1379
1500
  continue
@@ -1403,19 +1524,23 @@ def restrict_to_taxa_list(taxa_list,
1403
1524
  test_index -= 1
1404
1525
  continue
1405
1526
 
1406
- assert test_taxon_name in speciesnet_latin_name_to_taxon_string
1527
+ assert test_taxon_name in speciesnet_latin_name_to_taxon_string, \
1528
+ '{} should be a substring of {}'.format(test_taxon_name,
1529
+ speciesnet_latin_name_to_taxon_string)
1407
1530
 
1408
1531
  # Is this taxon allowed according to the custom species list?
1409
1532
  if test_taxon_name in allowed_parent_taxon_to_child_taxa:
1410
1533
 
1411
1534
  allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
1412
- assert allowed_child_taxa is not None
1535
+ assert allowed_child_taxa is not None, \
1536
+ 'allowed_child_taxa should not be None: {}'.format(test_taxon_name)
1413
1537
 
1414
1538
  # If this is the lowest-level allowable token or there is not a
1415
1539
  # unique child, don't walk any further, even if walking down
1416
1540
  # is enabled.
1417
- if (None in allowed_child_taxa):
1418
- assert len(allowed_child_taxa) == 1
1541
+ if None in allowed_child_taxa:
1542
+ assert len(allowed_child_taxa) == 1, \
1543
+ '"None" should not be listed as a child taxa with other child taxa'
1419
1544
 
1420
1545
  if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
1421
1546
  target_taxon = test_taxon_name
@@ -1427,8 +1552,12 @@ def restrict_to_taxa_list(taxa_list,
1427
1552
  while ((next(iter(allowed_child_taxa)) is not None) and \
1428
1553
  (len(allowed_child_taxa) == 1)):
1429
1554
  candidate_taxon = next(iter(allowed_child_taxa))
1430
- assert candidate_taxon in allowed_parent_taxon_to_child_taxa
1431
- assert candidate_taxon in speciesnet_latin_name_to_taxon_string
1555
+ assert candidate_taxon in allowed_parent_taxon_to_child_taxa, \
1556
+ '{} should be a subset of {}'.format(
1557
+ candidate_taxon,allowed_parent_taxon_to_child_taxa)
1558
+ assert candidate_taxon in speciesnet_latin_name_to_taxon_string, \
1559
+ '{} should be a subset of {}'.format(
1560
+ candidate_taxon,speciesnet_latin_name_to_taxon_string)
1432
1561
  allowed_child_taxa = \
1433
1562
  allowed_parent_taxon_to_child_taxa[candidate_taxon]
1434
1563
  target_taxon = candidate_taxon
@@ -1450,21 +1579,30 @@ def restrict_to_taxa_list(taxa_list,
1450
1579
 
1451
1580
  ##%% Build the new tables
1452
1581
 
1582
+ speciesnet_taxon_string_to_latin_name = invert_dictionary(speciesnet_latin_name_to_taxon_string)
1583
+
1453
1584
  input_category_id_to_output_category_id = {}
1454
1585
  output_taxon_string_to_category_id = {}
1455
1586
  output_category_id_to_common_name = {}
1456
1587
 
1457
1588
  for input_category_id in input_category_id_to_output_taxon_string:
1458
1589
 
1459
- original_common_name = \
1460
- input_category_id_to_common_name[input_category_id]
1461
- original_taxon_string = \
1462
- input_category_id_to_taxonomy_string[input_category_id]
1463
1590
  output_taxon_string = \
1464
1591
  input_category_id_to_output_taxon_string[input_category_id]
1465
1592
 
1466
1593
  output_common_name = output_taxon_string.split(';')[-1]
1467
1594
 
1595
+ # Possibly substitute a custom common name
1596
+ if output_taxon_string in speciesnet_taxon_string_to_latin_name:
1597
+
1598
+ speciesnet_latin_name = speciesnet_taxon_string_to_latin_name[output_taxon_string]
1599
+
1600
+ if speciesnet_latin_name in speciesnet_latin_name_to_output_common_name:
1601
+ custom_common_name = speciesnet_latin_name_to_output_common_name[speciesnet_latin_name]
1602
+ if custom_common_name != output_common_name:
1603
+ print('Substituting common name {} for {}'.format(custom_common_name,output_common_name))
1604
+ output_common_name = custom_common_name
1605
+
1468
1606
  # Do we need to create a new output category?
1469
1607
  if output_taxon_string not in output_taxon_string_to_category_id:
1470
1608
  output_category_id = str(len(output_taxon_string_to_category_id))
@@ -1479,21 +1617,28 @@ def restrict_to_taxa_list(taxa_list,
1479
1617
  input_category_id_to_output_category_id[input_category_id] = \
1480
1618
  output_category_id
1481
1619
 
1620
+ # Sometimes-useful debug printouts
1482
1621
  if False:
1622
+ original_common_name = \
1623
+ input_category_id_to_common_name[input_category_id]
1624
+
1625
+ original_taxon_string = \
1626
+ input_category_id_to_taxonomy_string[input_category_id]
1627
+
1483
1628
  print('Mapping {} ({}) to:\n{} ({})\n'.format(
1484
1629
  original_common_name,original_taxon_string,
1485
1630
  output_common_name,output_taxon_string))
1486
- if False:
1487
1631
  print('Mapping {} to {}'.format(
1488
1632
  original_common_name,output_common_name,))
1489
1633
 
1490
1634
  # ...for each category
1491
1635
 
1492
1636
 
1493
- ##%% Remap all category labels
1637
+ #%% Remap all category labels
1494
1638
 
1495
1639
  assert len(set(output_taxon_string_to_category_id.keys())) == \
1496
- len(set(output_taxon_string_to_category_id.values()))
1640
+ len(set(output_taxon_string_to_category_id.values())), \
1641
+ 'Category ID/value non-uniqueness error'
1497
1642
 
1498
1643
  output_category_id_to_taxon_string = \
1499
1644
  invert_dictionary(output_taxon_string_to_category_id)
@@ -180,8 +180,10 @@ def taxonomy_level_index(s):
180
180
  if len(tokens) == 7:
181
181
  tokens = tokens[1:-1]
182
182
 
183
+ # Anything without a class is considered non-taxonomic
183
184
  if len(tokens[0]) == 0:
184
185
  return 0
186
+
185
187
  # WI taxonomy strings start at class, so we'll never return 1 (kingdom) or 2 (phylum)
186
188
  elif len(tokens[1]) == 0:
187
189
  return 3
@@ -198,6 +200,22 @@ def taxonomy_level_index(s):
198
200
  return 8
199
201
 
200
202
 
203
+ def is_taxonomic_prediction_string(s):
204
+ """
205
+ Determines whether [s] is a classification string that has taxonomic properties; this
206
+ does not include, e.g., blanks/vehicles/no cv result. It also excludes "animal".
207
+
208
+ Args:
209
+ s (str): a five- or seven-token taxonomic string
210
+
211
+ Returns:
212
+ bool: whether [s] is a taxonomic category
213
+ """
214
+
215
+ return (taxonomy_level_index(s) > 0)
216
+
217
+
218
+
201
219
  def get_kingdom(prediction_string):
202
220
  """
203
221
  Return the kingdom field from a WI prediction string
@@ -1,134 +1,134 @@
1
- Metadata-Version: 2.4
2
- Name: megadetector
3
- Version: 10.0.6
4
- Summary: MegaDetector is an AI model that helps conservation folks spend less time doing boring things with camera trap images.
5
- Author-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
6
- Maintainer-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
7
- License: MIT License
8
-
9
- Permission is hereby granted, free of charge, to any person obtaining a copy
10
- of this software and associated documentation files (the "Software"), to deal
11
- in the Software without restriction, including without limitation the rights
12
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
- copies of the Software, and to permit persons to whom the Software is
14
- furnished to do so, subject to the following conditions:
15
-
16
- The above copyright notice and this permission notice shall be included in all
17
- copies or substantial portions of the Software.
18
-
19
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
- SOFTWARE.
26
-
27
- Project-URL: Homepage, https://github.com/agentmorris/MegaDetector
28
- Project-URL: Documentation, https://megadetector.readthedocs.io
29
- Project-URL: Bug Reports, https://github.com/agentmorris/MegaDetector/issues
30
- Project-URL: Source, https://github.com/agentmorris/MegaDetector
31
- Keywords: camera traps,conservation,wildlife,ai,megadetector
32
- Classifier: Programming Language :: Python :: 3
33
- Requires-Python: <3.14,>=3.9
34
- Description-Content-Type: text/markdown
35
- License-File: LICENSE
36
- Requires-Dist: mkl==2024.0; sys_platform != "darwin"
37
- Requires-Dist: numpy>=1.26.4
38
- Requires-Dist: Pillow>=9.5
39
- Requires-Dist: tqdm>=4.64.0
40
- Requires-Dist: jsonpickle>=3.0.2
41
- Requires-Dist: humanfriendly>=2.1
42
- Requires-Dist: matplotlib>=3.8.0
43
- Requires-Dist: opencv-python>=4.8.0
44
- Requires-Dist: requests>=2.31.0
45
- Requires-Dist: pyqtree>=1.0.0
46
- Requires-Dist: scikit-learn>=1.3.1
47
- Requires-Dist: pandas>=2.1.1
48
- Requires-Dist: python-dateutil
49
- Requires-Dist: send2trash
50
- Requires-Dist: clipboard
51
- Requires-Dist: dill
52
- Requires-Dist: ruff
53
- Requires-Dist: pytest
54
- Requires-Dist: ultralytics-yolov5==0.1.1
55
- Requires-Dist: yolov9pip==0.0.4
56
- Dynamic: license-file
57
-
58
- # MegaDetector
59
-
60
- This package is a pip-installable version of the support/inference code for [MegaDetector](https://github.com/agentmorris/MegaDetector/?tab=readme-ov-file#megadetector), an object detection model that helps conservation biologists spend less time doing boring things with camera trap images. Complete documentation for this Python package is available at [megadetector.readthedocs.io](https://megadetector.readthedocs.io).
61
-
62
- If you aren't looking for the Python package specifically, and you just want to learn more about what MegaDetector is all about, head over to the [MegaDetector repo](https://github.com/agentmorris/MegaDetector/?tab=readme-ov-file#megadetector).
63
-
64
- If you don't want to run MegaDetector, and you just want to use the utilities in this package - postprocessing, manipulating large volumes of camera trap images, etc. - you may want to check out the [megadetector-utils](https://pypi.org/project/megadetector-utils/) package, which is identical to this one, but excludes all of the PyTorch/YOLO dependencies, and is thus approximately one zillion times smaller.
65
-
66
- ## Installation
67
-
68
- Install with:
69
-
70
- `pip install megadetector`
71
-
72
- MegaDetector model weights aren't downloaded at the time you install the package, but they will be (optionally) automatically downloaded the first time you run the model.
73
-
74
- ## Package reference
75
-
76
- See [megadetector.readthedocs.io](https://megadetector.readthedocs.io).
77
-
78
-
79
- ## Examples of things you can do with this package
80
-
81
- ### Run MegaDetector on one image and count the number of detections
82
-
83
- ```
84
- from megadetector.utils import url_utils
85
- from megadetector.visualization import visualization_utils as vis_utils
86
- from megadetector.detection import run_detector
87
-
88
- # This is the image at the bottom of this page, it has one animal in it
89
- image_url = 'https://github.com/agentmorris/MegaDetector/raw/main/images/orinoquia-thumb-web.jpg'
90
- temporary_filename = url_utils.download_url(image_url)
91
-
92
- image = vis_utils.load_image(temporary_filename)
93
-
94
- # This will automatically download MDv5a; you can also specify a filename.
95
- model = run_detector.load_detector('MDV5A')
96
-
97
- result = model.generate_detections_one_image(image)
98
-
99
- detections_above_threshold = [d for d in result['detections'] if d['conf'] > 0.2]
100
- print('Found {} detections above threshold'.format(len(detections_above_threshold)))
101
- ```
102
-
103
- ### Run MegaDetector on a folder of images
104
-
105
- ```
106
- from megadetector.detection.run_detector_batch import \
107
- load_and_run_detector_batch, write_results_to_file
108
- from megadetector.utils import path_utils
109
- import os
110
-
111
- # Pick a folder to run MD on recursively, and an output file
112
- image_folder = os.path.expanduser('~/megadetector_test_images')
113
- output_file = os.path.expanduser('~/megadetector_output_test.json')
114
-
115
- # Recursively find images
116
- image_file_names = path_utils.find_images(image_folder,recursive=True)
117
-
118
- # This will automatically download MDv5a; you can also specify a filename.
119
- results = load_and_run_detector_batch('MDV5A', image_file_names)
120
-
121
- # Write results to a format that Timelapse and other downstream tools like.
122
- write_results_to_file(results,
123
- output_file,
124
- relative_path_base=image_folder,
125
- detector_file=detector_filename)
126
- ```
127
-
128
- ## Contact
129
-
130
- Contact <a href="cameratraps@lila.science">cameratraps@lila.science</a> with questions.
131
-
132
- ## Gratuitous animal picture
133
-
134
- <img src="https://github.com/agentmorris/MegaDetector/raw/main/images/orinoquia-thumb-web_detections.jpg"><br/>Image credit University of Minnesota, from the [Orinoquía Camera Traps](http://lila.science/datasets/orinoquia-camera-traps/) data set.
1
+ Metadata-Version: 2.4
2
+ Name: megadetector
3
+ Version: 10.0.7
4
+ Summary: MegaDetector is an AI model that helps conservation folks spend less time doing boring things with camera trap images.
5
+ Author-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
6
+ Maintainer-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
7
+ License: MIT License
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: Homepage, https://github.com/agentmorris/MegaDetector
28
+ Project-URL: Documentation, https://megadetector.readthedocs.io
29
+ Project-URL: Bug Reports, https://github.com/agentmorris/MegaDetector/issues
30
+ Project-URL: Source, https://github.com/agentmorris/MegaDetector
31
+ Keywords: camera traps,conservation,wildlife,ai,megadetector
32
+ Classifier: Programming Language :: Python :: 3
33
+ Requires-Python: <3.14,>=3.9
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: mkl==2024.0; sys_platform != "darwin"
37
+ Requires-Dist: numpy>=1.26.4
38
+ Requires-Dist: Pillow>=9.5
39
+ Requires-Dist: tqdm>=4.64.0
40
+ Requires-Dist: jsonpickle>=3.0.2
41
+ Requires-Dist: humanfriendly>=2.1
42
+ Requires-Dist: matplotlib>=3.8.0
43
+ Requires-Dist: opencv-python>=4.8.0
44
+ Requires-Dist: requests>=2.31.0
45
+ Requires-Dist: pyqtree>=1.0.0
46
+ Requires-Dist: scikit-learn>=1.3.1
47
+ Requires-Dist: pandas>=2.1.1
48
+ Requires-Dist: python-dateutil
49
+ Requires-Dist: send2trash
50
+ Requires-Dist: clipboard
51
+ Requires-Dist: dill
52
+ Requires-Dist: ruff
53
+ Requires-Dist: pytest
54
+ Requires-Dist: ultralytics-yolov5==0.1.1
55
+ Requires-Dist: yolov9pip==0.0.4
56
+ Dynamic: license-file
57
+
58
+ # MegaDetector
59
+
60
+ This package is a pip-installable version of the support/inference code for [MegaDetector](https://github.com/agentmorris/MegaDetector/?tab=readme-ov-file#megadetector), an object detection model that helps conservation biologists spend less time doing boring things with camera trap images. Complete documentation for this Python package is available at [megadetector.readthedocs.io](https://megadetector.readthedocs.io).
61
+
62
+ If you aren't looking for the Python package specifically, and you just want to learn more about what MegaDetector is all about, head over to the [MegaDetector repo](https://github.com/agentmorris/MegaDetector/?tab=readme-ov-file#megadetector).
63
+
64
+ If you don't want to run MegaDetector, and you just want to use the utilities in this package - postprocessing, manipulating large volumes of camera trap images, etc. - you may want to check out the [megadetector-utils](https://pypi.org/project/megadetector-utils/) package, which is identical to this one, but excludes all of the PyTorch/YOLO dependencies, and is thus approximately one zillion times smaller.
65
+
66
+ ## Installation
67
+
68
+ Install with:
69
+
70
+ `pip install megadetector`
71
+
72
+ MegaDetector model weights aren't downloaded at the time you install the package, but they will be (optionally) automatically downloaded the first time you run the model.
73
+
74
+ ## Package reference
75
+
76
+ See [megadetector.readthedocs.io](https://megadetector.readthedocs.io).
77
+
78
+
79
+ ## Examples of things you can do with this package
80
+
81
+ ### Run MegaDetector on one image and count the number of detections
82
+
83
+ ```
84
+ from megadetector.utils import url_utils
85
+ from megadetector.visualization import visualization_utils as vis_utils
86
+ from megadetector.detection import run_detector
87
+
88
+ # This is the image at the bottom of this page, it has one animal in it
89
+ image_url = 'https://github.com/agentmorris/MegaDetector/raw/main/images/orinoquia-thumb-web.jpg'
90
+ temporary_filename = url_utils.download_url(image_url)
91
+
92
+ image = vis_utils.load_image(temporary_filename)
93
+
94
+ # This will automatically download MDv5a; you can also specify a filename.
95
+ model = run_detector.load_detector('MDV5A')
96
+
97
+ result = model.generate_detections_one_image(image)
98
+
99
+ detections_above_threshold = [d for d in result['detections'] if d['conf'] > 0.2]
100
+ print('Found {} detections above threshold'.format(len(detections_above_threshold)))
101
+ ```
102
+
103
+ ### Run MegaDetector on a folder of images
104
+
105
+ ```
106
+ from megadetector.detection.run_detector_batch import \
107
+ load_and_run_detector_batch, write_results_to_file
108
+ from megadetector.utils import path_utils
109
+ import os
110
+
111
+ # Pick a folder to run MD on recursively, and an output file
112
+ image_folder = os.path.expanduser('~/megadetector_test_images')
113
+ output_file = os.path.expanduser('~/megadetector_output_test.json')
114
+
115
+ # Recursively find images
116
+ image_file_names = path_utils.find_images(image_folder,recursive=True)
117
+
118
+ # This will automatically download MDv5a; you can also specify a filename.
119
+ results = load_and_run_detector_batch('MDV5A', image_file_names)
120
+
121
+ # Write results to a format that Timelapse and other downstream tools like.
122
+ write_results_to_file(results,
123
+ output_file,
124
+ relative_path_base=image_folder,
125
+ detector_file=detector_filename)
126
+ ```
127
+
128
+ ## Contact
129
+
130
+ Contact <a href="cameratraps@lila.science">cameratraps@lila.science</a> with questions.
131
+
132
+ ## Gratuitous animal picture
133
+
134
+ <img src="https://github.com/agentmorris/MegaDetector/raw/main/images/orinoquia-thumb-web_detections.jpg"><br/>Image credit University of Minnesota, from the [Orinoquía Camera Traps](http://lila.science/datasets/orinoquia-camera-traps/) data set.
@@ -84,7 +84,7 @@ megadetector/detection/video_utils.py,sha256=AlmNJ5n7qmv3Z65HcjI1ALAxXMmyTG3pUiO
84
84
  megadetector/postprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  megadetector/postprocessing/add_max_conf.py,sha256=9MYtsH2mwkiaZb7Qcor5J_HskfAj7d9srp8G_Qldpk0,1722
86
86
  megadetector/postprocessing/categorize_detections_by_size.py,sha256=DpZpRNFlyeOfWuOc6ICuENgIWDCEtiErJ_frBZp9lYM,5382
87
- megadetector/postprocessing/classification_postprocessing.py,sha256=klHi5OFQx0LTQntg384llZeSmrxUWEm2HJQmEU-sG8U,60710
87
+ megadetector/postprocessing/classification_postprocessing.py,sha256=TrhzjMWkoLs4ml0ObCKR7rUpprbT7sgS6RtyjPicj9A,68362
88
88
  megadetector/postprocessing/combine_batch_outputs.py,sha256=BEP8cVa0sMIPg7tkWQc_8vOEPnbmWjOsQdVJHe61uz8,8468
89
89
  megadetector/postprocessing/compare_batch_results.py,sha256=RDlKLwea76rOWiDneSJUj6P_oMBMnD2BY4inoxLqQiw,84258
90
90
  megadetector/postprocessing/convert_output_format.py,sha256=FiwKSiMyEeNVLLfjpQtx3CrMbchwNUaW2TgLmdXGFVo,14892
@@ -131,7 +131,7 @@ megadetector/utils/split_locations_into_train_val.py,sha256=fd_6pj1aWY6hybwaXvBn
131
131
  megadetector/utils/string_utils.py,sha256=r2Maw3zbzk3EyaZcNkdqr96yP_8m4ey6v0WxlemEY9U,6155
132
132
  megadetector/utils/url_utils.py,sha256=VWYDHbWctTtw7mvbb_A5DTdF3v9V2mWhBoOP5MGE5S8,25728
133
133
  megadetector/utils/wi_platform_utils.py,sha256=8CGpiox_aL6RVZKfJqPVwpW4_6Cjku0HIajJPcmeNpE,32019
134
- megadetector/utils/wi_taxonomy_utils.py,sha256=VTlWqZqNBplpvTvg7E_tCh88RdG_wf-6jE6ahU7GOuI,66173
134
+ megadetector/utils/wi_taxonomy_utils.py,sha256=vZ_UlRtyLpfF4-ehBt7HHjcj7PsI2dVWFz2tES9cxt4,66641
135
135
  megadetector/utils/write_html_image_list.py,sha256=6Tbe5wyUxoBYJgH9yVrxxKCeWF2BVre_wQMEOQJ-ZIU,9068
136
136
  megadetector/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
137
  megadetector/visualization/plot_utils.py,sha256=uDDlOhdaJ3V8sGj2kS9b0cgszKc8WCq2_ofl6TW_XUs,10727
@@ -140,8 +140,8 @@ megadetector/visualization/visualization_utils.py,sha256=E5uvysS3F1S_yiPFxZty3U2
140
140
  megadetector/visualization/visualize_db.py,sha256=8YDWSR0eMehXYdPtak9z8UUw35xV7hu-0eCuzgSLjWc,25558
141
141
  megadetector/visualization/visualize_detector_output.py,sha256=HpWh7ugwo51YBHsFi40iAp9G-uRAMMjgsm8H_uBolBs,20295
142
142
  megadetector/visualization/visualize_video_output.py,sha256=4A5uit_JVV46kZCsO6j0bZ5-o6ZTAlXKuVvvR_xWpho,20266
143
- megadetector-10.0.6.dist-info/licenses/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
144
- megadetector-10.0.6.dist-info/METADATA,sha256=KjAHAEEE_ELiV6BtcFx_bfjKJg9fxFFEOF_0uHtbp3s,6352
145
- megadetector-10.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
- megadetector-10.0.6.dist-info/top_level.txt,sha256=wf9DXa8EwiOSZ4G5IPjakSxBPxTDjhYYnqWRfR-zS4M,13
147
- megadetector-10.0.6.dist-info/RECORD,,
143
+ megadetector-10.0.7.dist-info/licenses/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
144
+ megadetector-10.0.7.dist-info/METADATA,sha256=TOU1IZ7EWaMp6D_11fpJNaH8_csJvVIXGynIol6flLc,6486
145
+ megadetector-10.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
+ megadetector-10.0.7.dist-info/top_level.txt,sha256=wf9DXa8EwiOSZ4G5IPjakSxBPxTDjhYYnqWRfR-zS4M,13
147
+ megadetector-10.0.7.dist-info/RECORD,,