megadetector 5.0.27__py3-none-any.whl → 5.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (26) hide show
  1. megadetector/data_management/mewc_to_md.py +1 -1
  2. megadetector/data_management/read_exif.py +2 -0
  3. megadetector/detection/process_video.py +1 -1
  4. megadetector/detection/pytorch_detector.py +4 -4
  5. megadetector/detection/run_detector.py +10 -3
  6. megadetector/detection/run_detector_batch.py +4 -3
  7. megadetector/detection/run_tiled_inference.py +65 -13
  8. megadetector/detection/video_utils.py +2 -2
  9. megadetector/postprocessing/classification_postprocessing.py +517 -20
  10. megadetector/postprocessing/create_crop_folder.py +1 -1
  11. megadetector/postprocessing/generate_csv_report.py +499 -0
  12. megadetector/postprocessing/load_api_results.py +4 -4
  13. megadetector/postprocessing/postprocess_batch_results.py +6 -4
  14. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -3
  15. megadetector/taxonomy_mapping/taxonomy_graph.py +1 -1
  16. megadetector/utils/ct_utils.py +3 -2
  17. megadetector/utils/path_utils.py +75 -29
  18. megadetector/utils/split_locations_into_train_val.py +16 -3
  19. megadetector/utils/wi_utils.py +68 -410
  20. megadetector/visualization/visualization_utils.py +25 -9
  21. megadetector/visualization/visualize_detector_output.py +50 -28
  22. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/METADATA +132 -132
  23. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/RECORD +26 -25
  24. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/WHEEL +1 -1
  25. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/licenses/LICENSE +0 -0
  26. {megadetector-5.0.27.dist-info → megadetector-5.0.28.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@ Functions for postprocessing species classification results, particularly:
13
13
 
14
14
  #%% Constants and imports
15
15
 
16
+ import os
16
17
  import json
17
18
  import copy
18
19
 
@@ -20,10 +21,18 @@ from collections import defaultdict
20
21
  from tqdm import tqdm
21
22
 
22
23
  from megadetector.utils.ct_utils import is_list_sorted
24
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
25
+ from megadetector.utils.ct_utils import sort_dictionary_by_key
26
+ from megadetector.utils.ct_utils import invert_dictionary
27
+
23
28
  from megadetector.utils.wi_utils import clean_taxonomy_string
24
29
  from megadetector.utils.wi_utils import taxonomy_level_index
25
30
  from megadetector.utils.wi_utils import taxonomy_level_string_to_index
26
- from megadetector.utils.ct_utils import sort_dictionary_by_value
31
+
32
+ from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
33
+ from megadetector.utils.wi_utils import human_prediction_string
34
+ from megadetector.utils.wi_utils import animal_prediction_string
35
+ from megadetector.utils.wi_utils import blank_prediction_string
27
36
 
28
37
 
29
38
  #%% Options classes
@@ -140,22 +149,34 @@ def _sort_images_by_time(images):
140
149
  return sorted(images, key = lambda im: im['datetime'])
141
150
 
142
151
 
143
- def _count_detections_by_category(detections,options):
152
+ def count_detections_by_classification_category(detections,options=None):
144
153
  """
145
- Count the number of instances of each category in the detections list
154
+ Count the number of instances of each classification category in the detections list
146
155
  [detections] that have an above-threshold detection. Sort results in descending
147
156
  order by count. Returns a dict mapping category ID --> count. If no detections
148
157
  are above threshold, returns an empty dict.
149
158
 
150
- Assumes that if the 'classifications' field is present for a detection, it has
151
- length 1, i.e. that non-top classifications have already been removed.
159
+ Only processes the top classification for each detection.
160
+
161
+ Args:
162
+ detections: detections list
163
+ options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
164
+
165
+ Returns:
166
+ dict mapping above-threshold category IDs to counts
152
167
  """
153
168
 
169
+ if detections is None or len(detections) == 0:
170
+ return {}
171
+
172
+ if options is None:
173
+ options = ClassificationSmoothingOptions()
174
+
154
175
  category_to_count = defaultdict(int)
155
176
 
156
177
  for det in detections:
157
178
  if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
158
- assert len(det['classifications']) == 1
179
+ # assert len(det['classifications']) == 1
159
180
  c = det['classifications'][0]
160
181
  if c[1] >= options.classification_confidence_threshold:
161
182
  category_to_count[c[0]] += 1
@@ -167,9 +188,16 @@ def _count_detections_by_category(detections,options):
167
188
  return category_to_count
168
189
 
169
190
 
170
- def _get_description_string(category_to_count,classification_descriptions):
191
+ def get_classification_description_string(category_to_count,classification_descriptions):
171
192
  """
172
193
  Return a string summarizing the image content according to [category_to_count].
194
+
195
+ Args:
196
+ category_to_count (dict): a dict mapping category IDs to counts
197
+ classification_descriptions (dict): a dict mapping category IDs to description strings
198
+
199
+ Returns:
200
+ string: a description of this image's content, e.g. "rabbit (4), human (1)"
173
201
  """
174
202
 
175
203
  category_strings = []
@@ -202,7 +230,7 @@ def _print_counts_with_names(category_to_count,classification_descriptions):
202
230
 
203
231
  def _prepare_results_for_smoothing(input_file,options):
204
232
  """
205
- Load results from [input_file] if necessary, prepare category descrptions
233
+ Load results from [input_file] if necessary, prepare category descriptions
206
234
  for smoothing. Adds pre-smoothing descriptions to every image if the options
207
235
  say we're supposed to do that.
208
236
  """
@@ -280,10 +308,10 @@ def _prepare_results_for_smoothing(input_file,options):
280
308
  continue
281
309
 
282
310
  detections = im['detections']
283
- category_to_count = _count_detections_by_category(detections, options)
311
+ category_to_count = count_detections_by_classification_category(detections, options)
284
312
 
285
313
  im['pre_smoothing_description'] = \
286
- _get_description_string(category_to_count, classification_descriptions)
314
+ get_classification_description_string(category_to_count, classification_descriptions)
287
315
 
288
316
 
289
317
  return {
@@ -316,9 +344,9 @@ def _smooth_classifications_for_list_of_detections(detections,
316
344
 
317
345
  ## Count the number of instances of each category in this image
318
346
 
319
- category_to_count = _count_detections_by_category(detections, options)
347
+ category_to_count = count_detections_by_classification_category(detections, options)
320
348
  # _print_counts_with_names(category_to_count,classification_descriptions)
321
- # _get_description_string(category_to_count, classification_descriptions)
349
+ # get_classification_description_string(category_to_count, classification_descriptions)
322
350
 
323
351
  if len(category_to_count) <= 1:
324
352
  return None
@@ -351,7 +379,7 @@ def _smooth_classifications_for_list_of_detections(detections,
351
379
 
352
380
  if verbose_debug_enabled:
353
381
  _print_counts_with_names(category_to_count,classification_descriptions)
354
- import pdb; pdb.set_trace()
382
+ from IPython import embed; embed()
355
383
 
356
384
 
357
385
  ## Possibly change "other" classifications to the most common category
@@ -377,6 +405,11 @@ def _smooth_classifications_for_list_of_detections(detections,
377
405
  if (c[1] >= options.classification_confidence_threshold) and \
378
406
  (c[0] in other_category_ids):
379
407
 
408
+ if verbose_debug_enabled:
409
+ print('Replacing {} with {}'.format(
410
+ classification_descriptions[c[0]],
411
+ classification_descriptions[c[1]]))
412
+
380
413
  n_other_classifications_changed_this_image += 1
381
414
  c[0] = most_common_category
382
415
 
@@ -385,11 +418,14 @@ def _smooth_classifications_for_list_of_detections(detections,
385
418
  # ...for each detection
386
419
 
387
420
  # ...if we should overwrite all "other" classifications
421
+
422
+ if verbose_debug_enabled:
423
+ print('Made {} other changes'.format(n_other_classifications_changed_this_image))
388
424
 
389
425
 
390
426
  ## Re-count
391
427
 
392
- category_to_count = _count_detections_by_category(detections, options)
428
+ category_to_count = count_detections_by_classification_category(detections, options)
393
429
  # _print_counts_with_names(category_to_count,classification_descriptions)
394
430
  keys = list(category_to_count.keys())
395
431
  max_count = category_to_count[keys[0]]
@@ -399,13 +435,18 @@ def _smooth_classifications_for_list_of_detections(detections,
399
435
 
400
436
  ## Possibly change some non-dominant classifications to the dominant category
401
437
 
438
+ process_taxonomic_rules = \
439
+ (classification_descriptions_clean is not None) and \
440
+ (len(classification_descriptions_clean) > 0) and \
441
+ (len(category_to_count) > 1)
442
+
402
443
  n_detections_flipped_this_image = 0
403
444
 
404
445
  # Don't do this if the most common category is an "other" category, or
405
446
  # if we don't have enough of the most common category
406
447
  if (most_common_category not in other_category_ids) and \
407
448
  (max_count >= options.min_detections_to_overwrite_secondary):
408
-
449
+
409
450
  # i_det = 0; det = detections[i_det]
410
451
  for i_det,det in enumerate(detections):
411
452
 
@@ -423,6 +464,32 @@ def _smooth_classifications_for_list_of_detections(detections,
423
464
  # Don't bother with below-threshold classifications
424
465
  if c[1] < options.classification_confidence_threshold:
425
466
  continue
467
+
468
+ # If we're doing taxonomic processing, at this stage, don't turn children
469
+ # into parents; we'll likely turn parents into children in the next stage.
470
+
471
+ if process_taxonomic_rules:
472
+
473
+ most_common_category_description = \
474
+ classification_descriptions_clean[most_common_category]
475
+
476
+ category_id_this_classification = c[0]
477
+ assert category_id_this_classification in category_to_count
478
+
479
+ category_description_this_classification = \
480
+ classification_descriptions_clean[category_id_this_classification]
481
+
482
+ # An empty description corresponds to the "animal" category. We don't handle
483
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
484
+ # step above.
485
+ if len(category_description_this_classification) == 0:
486
+ continue
487
+
488
+ most_common_category_is_parent_of_this_category = \
489
+ most_common_category_description in category_description_this_classification
490
+
491
+ if most_common_category_is_parent_of_this_category:
492
+ continue
426
493
 
427
494
  # If we have fewer of this category than the most common category,
428
495
  # but not *too* many, flip it to the most common category.
@@ -436,10 +503,14 @@ def _smooth_classifications_for_list_of_detections(detections,
436
503
 
437
504
  # ...if the dominant category is legit
438
505
 
506
+ if verbose_debug_enabled:
507
+ print('Made {} non-dominant --> dominant changes'.format(
508
+ n_detections_flipped_this_image))
509
+
439
510
 
440
511
  ## Re-count
441
512
 
442
- category_to_count = _count_detections_by_category(detections, options)
513
+ category_to_count = count_detections_by_classification_category(detections, options)
443
514
  # _print_counts_with_names(category_to_count,classification_descriptions)
444
515
  keys = list(category_to_count.keys())
445
516
  max_count = category_to_count[keys[0]]
@@ -449,8 +520,6 @@ def _smooth_classifications_for_list_of_detections(detections,
449
520
 
450
521
  ## Possibly collapse higher-level taxonomic predictions down to lower levels
451
522
 
452
- # ...when the most common class is a child of a less common class.
453
-
454
523
  n_taxonomic_changes_this_image = 0
455
524
 
456
525
  process_taxonomic_rules = \
@@ -552,7 +621,7 @@ def _smooth_classifications_for_list_of_detections(detections,
552
621
 
553
622
  ## Re-count
554
623
 
555
- category_to_count = _count_detections_by_category(detections, options)
624
+ category_to_count = count_detections_by_classification_category(detections, options)
556
625
  # _print_counts_with_names(category_to_count,classification_descriptions)
557
626
  keys = list(category_to_count.keys())
558
627
  max_count = category_to_count[keys[0]]
@@ -895,7 +964,7 @@ def smooth_classification_results_sequence_level(input_file,
895
964
  image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
896
965
 
897
966
  # if 'file' in image_filenames_this_sequence:
898
- # import pdb; pdb.set_trace()
967
+ # from IPython import embed; embed()
899
968
 
900
969
  detections_this_sequence = []
901
970
  for image_filename in image_filenames_this_sequence:
@@ -978,3 +1047,431 @@ def smooth_classification_results_sequence_level(input_file,
978
1047
  return d
979
1048
 
980
1049
  # ...smooth_classification_results_sequence_level(...)
1050
+
1051
+
1052
+ def restrict_to_taxa_list(taxa_list,
1053
+ speciesnet_taxonomy_file,
1054
+ input_file,
1055
+ output_file,
1056
+ allow_walk_down=False,
1057
+ add_pre_filtering_description=True):
1058
+ """
1059
+ Given a prediction file in MD .json format, likely without having had
1060
+ a geofence applied, apply a custom taxa list.
1061
+
1062
+ Args:
1063
+ taxa_list (str or list): list of latin names, or a text file containing
1064
+ a list of latin names. Optionally may contain a second (comma-delimited)
1065
+ column containing common names, used only for debugging. Latin names
1066
+ must exist in the SpeciesNet taxonomy.
1067
+ speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1068
+ model release (with 7-token taxonomy entries)
1069
+ input_file (str): .json file to read, in MD format. This can be None, in which
1070
+ case this function just validates [taxa_list].
1071
+ output_file (str): .json file to write, in MD format
1072
+ allow_walk_down (bool, optional): should we walk down the taxonomy tree
1073
+ when making mappings if a parent has only a single allowable child?
1074
+ For example, if only a single felid species is allowed, should other
1075
+ felid predictions be mapped to that species, as opposed to being mapped
1076
+ to the family?
1077
+ add_pre_restriction_description (bool, optional): should we add a new metadata
1078
+ field that summarizes each image's classifications prior to taxonomic
1079
+ restriction?
1080
+ """
1081
+
1082
+ ##%% Read target taxa list
1083
+
1084
+ if isinstance(taxa_list,str):
1085
+ assert os.path.isfile(taxa_list), \
1086
+ 'Could not find taxa list file {}'.format(taxa_list)
1087
+ with open(taxa_list,'r') as f:
1088
+ taxa_list = f.readlines()
1089
+
1090
+ taxa_list = [s.strip().lower() for s in taxa_list]
1091
+ taxa_list = [s for s in taxa_list if len(s) > 0]
1092
+
1093
+ target_latin_to_common = {}
1094
+ for s in taxa_list:
1095
+ if s.strip().startswith('#'):
1096
+ continue
1097
+ tokens = s.split(',')
1098
+ assert len(tokens) <= 2
1099
+ binomial_name = tokens[0]
1100
+ assert len(binomial_name.split(' ')) in (1,2,3), \
1101
+ 'Illegal binomial name in species list: {}'.format(binomial_name)
1102
+ if len(tokens) > 0:
1103
+ common_name = tokens[1].strip().lower()
1104
+ else:
1105
+ common_name = None
1106
+ assert binomial_name not in target_latin_to_common
1107
+ target_latin_to_common[binomial_name] = common_name
1108
+
1109
+
1110
+ ##%% Read taxonomy file
1111
+
1112
+ with open(speciesnet_taxonomy_file,'r') as f:
1113
+ speciesnet_taxonomy_list = f.readlines()
1114
+ speciesnet_taxonomy_list = [s.strip() for s in \
1115
+ speciesnet_taxonomy_list if len(s.strip()) > 0]
1116
+
1117
+ # Maps the latin name of every taxon to the corresponding full taxon string
1118
+ #
1119
+ # For species, the key is a binomial name
1120
+ speciesnet_latin_name_to_taxon_string = {}
1121
+ speciesnet_common_name_to_taxon_string = {}
1122
+
1123
+ def _insert_taxonomy_string(s):
1124
+
1125
+ tokens = s.split(';')
1126
+ assert len(tokens) == 7
1127
+
1128
+ guid = tokens[0] # noqa
1129
+ class_name = tokens[1]
1130
+ order = tokens[2]
1131
+ family = tokens[3]
1132
+ genus = tokens[4]
1133
+ species = tokens[5]
1134
+ common_name = tokens[6]
1135
+
1136
+ if len(class_name) == 0:
1137
+ assert common_name in ('animal','vehicle','blank')
1138
+ return
1139
+
1140
+ if len(species) > 0:
1141
+ assert all([len(s) > 0 for s in [genus,family,order]])
1142
+ binomial_name = genus + ' ' + species
1143
+ if binomial_name not in speciesnet_latin_name_to_taxon_string:
1144
+ speciesnet_latin_name_to_taxon_string[binomial_name] = s
1145
+ elif len(genus) > 0:
1146
+ assert all([len(s) > 0 for s in [family,order]])
1147
+ if genus not in speciesnet_latin_name_to_taxon_string:
1148
+ speciesnet_latin_name_to_taxon_string[genus] = s
1149
+ elif len(family) > 0:
1150
+ assert len(order) > 0
1151
+ if family not in speciesnet_latin_name_to_taxon_string:
1152
+ speciesnet_latin_name_to_taxon_string[family] = s
1153
+ elif len(order) > 0:
1154
+ if order not in speciesnet_latin_name_to_taxon_string:
1155
+ speciesnet_latin_name_to_taxon_string[order] = s
1156
+ else:
1157
+ if class_name not in speciesnet_latin_name_to_taxon_string:
1158
+ speciesnet_latin_name_to_taxon_string[class_name] = s
1159
+
1160
+ if len(common_name) > 0:
1161
+ if common_name not in speciesnet_common_name_to_taxon_string:
1162
+ speciesnet_common_name_to_taxon_string[common_name] = s
1163
+
1164
+ for s in speciesnet_taxonomy_list:
1165
+
1166
+ _insert_taxonomy_string(s)
1167
+
1168
+
1169
+ ##%% Make sure all parent taxa are represented in the taxonomy
1170
+
1171
+ # In theory any taxon that appears as the parent of another taxon should
1172
+ # also be in the taxonomy, but this isn't always true, so we fix it here.
1173
+
1174
+ new_taxon_string_to_missing_tokens = defaultdict(list)
1175
+
1176
+ # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
1177
+ for latin_name in speciesnet_latin_name_to_taxon_string.keys():
1178
+
1179
+ if 'no cv result' in latin_name:
1180
+ continue
1181
+
1182
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1183
+ tokens = taxon_string.split(';')
1184
+
1185
+ # Don't process GUID, species, or common name
1186
+ # i_token = 6
1187
+ for i_token in range(1,len(tokens)-2):
1188
+
1189
+ test_token = tokens[i_token]
1190
+ if len(test_token) == 0:
1191
+ continue
1192
+
1193
+ # Do we need to make up a taxon for this token?
1194
+ if test_token not in speciesnet_latin_name_to_taxon_string:
1195
+
1196
+ new_tokens = [''] * 7
1197
+ new_tokens[0] = 'fake_guid'
1198
+ for i_copy_token in range(1,i_token+1):
1199
+ new_tokens[i_copy_token] = tokens[i_copy_token]
1200
+ new_tokens[-1] = test_token + ' species'
1201
+ assert new_tokens[-2] == ''
1202
+ new_taxon_string = ';'.join(new_tokens)
1203
+ # assert new_taxon_string not in new_taxon_strings
1204
+ new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
1205
+
1206
+ # ...for each token
1207
+
1208
+ # ...for each taxon
1209
+
1210
+ print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
1211
+ len(new_taxon_string_to_missing_tokens)))
1212
+
1213
+ new_taxon_string_to_missing_tokens = \
1214
+ sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
1215
+ for taxon_string in new_taxon_string_to_missing_tokens:
1216
+ missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1217
+ print('{} ({})'.format(taxon_string,missing_taxa))
1218
+
1219
+ for new_taxon_string in new_taxon_string_to_missing_tokens:
1220
+ _insert_taxonomy_string(new_taxon_string)
1221
+
1222
+
1223
+ ##%% Make sure all species on the allow-list are in the taxonomy
1224
+
1225
+ n_failed_mappings = 0
1226
+
1227
+ for target_taxon_latin_name in target_latin_to_common.keys():
1228
+ if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
1229
+ common_name = target_latin_to_common[target_taxon_latin_name]
1230
+ s = '{} ({}) not in speciesnet taxonomy'.format(
1231
+ target_taxon_latin_name,common_name)
1232
+ if common_name in speciesnet_common_name_to_taxon_string:
1233
+ s += ' (common name maps to {})'.format(
1234
+ speciesnet_common_name_to_taxon_string[common_name])
1235
+ print(s)
1236
+ n_failed_mappings += 1
1237
+
1238
+ if n_failed_mappings > 0:
1239
+ raise ValueError('Cannot continue with geofence generation')
1240
+
1241
+
1242
+ ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
1243
+
1244
+ # Maps parent names to all allowed child names, or None if this is the
1245
+ # lowest-level allowable taxon on this path
1246
+ allowed_parent_taxon_to_child_taxa = defaultdict(set)
1247
+
1248
+ # latin_name = next(iter(target_latin_to_common.keys()))
1249
+ for latin_name in target_latin_to_common:
1250
+
1251
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1252
+ tokens = taxon_string.split(';')
1253
+ assert len(tokens) == 7
1254
+
1255
+ # Remove GUID and common mame
1256
+ #
1257
+ # This is now always class/order/family/genus/species
1258
+ tokens = tokens[1:-1]
1259
+
1260
+ child_taxon = None
1261
+
1262
+ # If this is a species
1263
+ if len(tokens[-1]) > 0:
1264
+ binomial_name = tokens[-2] + ' ' + tokens[-1]
1265
+ assert binomial_name == latin_name
1266
+ allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
1267
+ child_taxon = binomial_name
1268
+
1269
+ # The first candidate parent is the genus
1270
+ parent_token_index = len(tokens) - 2
1271
+
1272
+ while(parent_token_index >= 0):
1273
+
1274
+ parent_taxon = tokens[parent_token_index]
1275
+ allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1276
+ child_taxon = parent_taxon
1277
+ parent_token_index -= 1
1278
+
1279
+ # ...for each allowed latin name
1280
+
1281
+ allowed_parent_taxon_to_child_taxa = \
1282
+ sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
1283
+
1284
+
1285
+ ##%% If we were just validating the custom taxa file, we're done
1286
+
1287
+ if input_file is None:
1288
+ print('Finished validating custom taxonomy list')
1289
+ return
1290
+
1291
+
1292
+ ##%% Map all predictions that exist in this dataset...
1293
+
1294
+ # ...to the prediction we should generate.
1295
+
1296
+ with open(input_file,'r') as f:
1297
+ input_data = json.load(f)
1298
+
1299
+ input_category_id_to_common_name = input_data['classification_categories'] #noqa
1300
+ input_category_id_to_taxonomy_string = \
1301
+ input_data['classification_category_descriptions']
1302
+
1303
+ input_category_id_to_output_taxon_string = {}
1304
+
1305
+ # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
1306
+ for input_category_id in input_category_id_to_taxonomy_string.keys():
1307
+
1308
+ input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
1309
+ input_taxon_tokens = input_taxon_string.split(';')
1310
+ assert len(input_taxon_tokens) == 7
1311
+
1312
+ # Don't mess with blank/no-cv-result/animal/human
1313
+ if (input_taxon_string in non_taxonomic_prediction_strings) or \
1314
+ (input_taxon_string == human_prediction_string):
1315
+ input_category_id_to_output_taxon_string[input_category_id] = \
1316
+ input_taxon_string
1317
+ continue
1318
+
1319
+ # Remove GUID and common mame
1320
+
1321
+ # This is now always class/order/family/genus/species
1322
+ input_taxon_tokens = input_taxon_tokens[1:-1]
1323
+
1324
+ test_index = len(input_taxon_tokens) - 1
1325
+ target_taxon = None
1326
+
1327
+ # Start at the species level, and see whether each taxon is allowed
1328
+ while((test_index >= 0) and (target_taxon is None)):
1329
+
1330
+ # Species are represented as binomial names
1331
+ if (test_index == (len(input_taxon_tokens) - 1)) and \
1332
+ (len(input_taxon_tokens[-1]) > 0):
1333
+ test_taxon_name = \
1334
+ input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
1335
+ else:
1336
+ test_taxon_name = input_taxon_tokens[test_index]
1337
+
1338
+ # If we haven't yet found the level at which this taxon is non-empty,
1339
+ # keep going up
1340
+ if len(test_taxon_name) == 0:
1341
+ test_index -= 1
1342
+ continue
1343
+
1344
+ assert test_taxon_name in speciesnet_latin_name_to_taxon_string
1345
+
1346
+ # Is this taxon allowed according to the custom species list?
1347
+ if test_taxon_name in allowed_parent_taxon_to_child_taxa:
1348
+
1349
+ allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
1350
+ assert allowed_child_taxa is not None
1351
+
1352
+ # If this is the lowest-level allowable token or there is not a
1353
+ # unique child, don't walk any further, even if walking down
1354
+ # is enabled.
1355
+ if (None in allowed_child_taxa):
1356
+ assert len(allowed_child_taxa) == 1
1357
+
1358
+ if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
1359
+ target_taxon = test_taxon_name
1360
+ elif not allow_walk_down:
1361
+ target_taxon = test_taxon_name
1362
+ else:
1363
+ # If there's a unique child, walk back *down* the allowable
1364
+ # taxa until we run out of unique children
1365
+ while ((next(iter(allowed_child_taxa)) is not None) and \
1366
+ (len(allowed_child_taxa) == 1)):
1367
+ candidate_taxon = next(iter(allowed_child_taxa))
1368
+ assert candidate_taxon in allowed_parent_taxon_to_child_taxa
1369
+ assert candidate_taxon in speciesnet_latin_name_to_taxon_string
1370
+ allowed_child_taxa = \
1371
+ allowed_parent_taxon_to_child_taxa[candidate_taxon]
1372
+ target_taxon = candidate_taxon
1373
+
1374
+ # ...if this is an allowed taxon
1375
+
1376
+ test_index -= 1
1377
+
1378
+ # ...for each token
1379
+
1380
+ if target_taxon is None:
1381
+ output_taxon_string = animal_prediction_string
1382
+ else:
1383
+ output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
1384
+ input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1385
+
1386
+ # ...for each category
1387
+
1388
+
1389
+ ##%% Build the new tables
1390
+
1391
+ input_category_id_to_output_category_id = {}
1392
+ output_taxon_string_to_category_id = {}
1393
+ output_category_id_to_common_name = {}
1394
+
1395
+ for input_category_id in input_category_id_to_output_taxon_string:
1396
+
1397
+ original_common_name = \
1398
+ input_category_id_to_common_name[input_category_id]
1399
+ original_taxon_string = \
1400
+ input_category_id_to_taxonomy_string[input_category_id]
1401
+ output_taxon_string = \
1402
+ input_category_id_to_output_taxon_string[input_category_id]
1403
+
1404
+ output_common_name = output_taxon_string.split(';')[-1]
1405
+
1406
+ # Do we need to create a new output category?
1407
+ if output_taxon_string not in output_taxon_string_to_category_id:
1408
+ output_category_id = str(len(output_taxon_string_to_category_id))
1409
+ output_taxon_string_to_category_id[output_taxon_string] = \
1410
+ output_category_id
1411
+ output_category_id_to_common_name[output_category_id] = \
1412
+ output_common_name
1413
+ else:
1414
+ output_category_id = \
1415
+ output_taxon_string_to_category_id[output_taxon_string]
1416
+
1417
+ input_category_id_to_output_category_id[input_category_id] = \
1418
+ output_category_id
1419
+
1420
+ if False:
1421
+ print('Mapping {} ({}) to:\n{} ({})\n'.format(
1422
+ original_common_name,original_taxon_string,
1423
+ output_common_name,output_taxon_string))
1424
+ if False:
1425
+ print('Mapping {} to {}'.format(
1426
+ original_common_name,output_common_name,))
1427
+
1428
+ # ...for each category
1429
+
1430
+
1431
+ ##%% Remap all category labels
1432
+
1433
+ assert len(set(output_taxon_string_to_category_id.keys())) == \
1434
+ len(set(output_taxon_string_to_category_id.values()))
1435
+
1436
+ output_category_id_to_taxon_string = \
1437
+ invert_dictionary(output_taxon_string_to_category_id)
1438
+
1439
+ with open(input_file,'r') as f:
1440
+ output_data = json.load(f)
1441
+
1442
+ classification_descriptions = None
1443
+ if 'classification_category_descriptions' in output_data:
1444
+ classification_descriptions = output_data['classification_category_descriptions']
1445
+
1446
+ for im in tqdm(output_data['images']):
1447
+
1448
+ if 'detections' not in im or im['detections'] is None:
1449
+ continue
1450
+
1451
+ # Possibly prepare a pre-filtering description
1452
+ pre_filtering_description = None
1453
+ if classification_descriptions is not None and add_pre_filtering_description:
1454
+ category_to_count = count_detections_by_classification_category(im['detections'])
1455
+ pre_filtering_description = \
1456
+ get_classification_description_string(category_to_count,classification_descriptions)
1457
+ im['pre_filtering_description'] = pre_filtering_description
1458
+
1459
+ for det in im['detections']:
1460
+ if 'classifications' in det:
1461
+ for classification in det['classifications']:
1462
+ classification[0] = \
1463
+ input_category_id_to_output_category_id[classification[0]]
1464
+
1465
+ # ...for each image
1466
+
1467
+ output_data['classification_categories'] = output_category_id_to_common_name
1468
+ output_data['classification_category_descriptions'] = \
1469
+ output_category_id_to_taxon_string
1470
+
1471
+
1472
+ ##%% Write output
1473
+
1474
+ with open(output_file,'w') as f:
1475
+ json.dump(output_data,f,indent=1)
1476
+
1477
+ # ...def restrict_to_taxa_list(...)
@@ -130,7 +130,7 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
130
130
  containing crop IDs, likely generated via create_crop_folder. All
131
131
  non-standard fields in this file will be passed along to [output_file].
132
132
  crop_results_file (str): results file for the crop folder
133
- output_file (str): ouptut .json file, containing crop-level classifications
133
+ output_file (str): output .json file, containing crop-level classifications
134
134
  mapped back to the image level.
135
135
  delete_crop_information (bool, optional): whether to delete the "crop_id" and
136
136
  "crop_filename_relative" fields from each detection, if present.