megadetector 5.0.26__py3-none-any.whl → 5.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/mewc_to_md.py +1 -1
- megadetector/data_management/read_exif.py +2 -0
- megadetector/detection/process_video.py +1 -1
- megadetector/detection/pytorch_detector.py +4 -4
- megadetector/detection/run_detector.py +10 -3
- megadetector/detection/run_detector_batch.py +4 -3
- megadetector/detection/run_tiled_inference.py +65 -13
- megadetector/detection/video_utils.py +2 -2
- megadetector/postprocessing/classification_postprocessing.py +517 -20
- megadetector/postprocessing/create_crop_folder.py +1 -1
- megadetector/postprocessing/generate_csv_report.py +499 -0
- megadetector/postprocessing/load_api_results.py +4 -4
- megadetector/postprocessing/postprocess_batch_results.py +6 -4
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -3
- megadetector/taxonomy_mapping/taxonomy_graph.py +1 -1
- megadetector/utils/ct_utils.py +3 -2
- megadetector/utils/path_utils.py +75 -29
- megadetector/utils/split_locations_into_train_val.py +16 -3
- megadetector/utils/wi_utils.py +68 -410
- megadetector/visualization/visualization_utils.py +25 -9
- megadetector/visualization/visualize_detector_output.py +50 -28
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/METADATA +132 -132
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/RECORD +26 -25
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/WHEEL +1 -1
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.26.dist-info → megadetector-5.0.28.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,7 @@ Functions for postprocessing species classification results, particularly:
|
|
|
13
13
|
|
|
14
14
|
#%% Constants and imports
|
|
15
15
|
|
|
16
|
+
import os
|
|
16
17
|
import json
|
|
17
18
|
import copy
|
|
18
19
|
|
|
@@ -20,10 +21,18 @@ from collections import defaultdict
|
|
|
20
21
|
from tqdm import tqdm
|
|
21
22
|
|
|
22
23
|
from megadetector.utils.ct_utils import is_list_sorted
|
|
24
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
25
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_key
|
|
26
|
+
from megadetector.utils.ct_utils import invert_dictionary
|
|
27
|
+
|
|
23
28
|
from megadetector.utils.wi_utils import clean_taxonomy_string
|
|
24
29
|
from megadetector.utils.wi_utils import taxonomy_level_index
|
|
25
30
|
from megadetector.utils.wi_utils import taxonomy_level_string_to_index
|
|
26
|
-
|
|
31
|
+
|
|
32
|
+
from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
|
|
33
|
+
from megadetector.utils.wi_utils import human_prediction_string
|
|
34
|
+
from megadetector.utils.wi_utils import animal_prediction_string
|
|
35
|
+
from megadetector.utils.wi_utils import blank_prediction_string
|
|
27
36
|
|
|
28
37
|
|
|
29
38
|
#%% Options classes
|
|
@@ -140,22 +149,34 @@ def _sort_images_by_time(images):
|
|
|
140
149
|
return sorted(images, key = lambda im: im['datetime'])
|
|
141
150
|
|
|
142
151
|
|
|
143
|
-
def
|
|
152
|
+
def count_detections_by_classification_category(detections,options=None):
|
|
144
153
|
"""
|
|
145
|
-
Count the number of instances of each category in the detections list
|
|
154
|
+
Count the number of instances of each classification category in the detections list
|
|
146
155
|
[detections] that have an above-threshold detection. Sort results in descending
|
|
147
156
|
order by count. Returns a dict mapping category ID --> count. If no detections
|
|
148
157
|
are above threshold, returns an empty dict.
|
|
149
158
|
|
|
150
|
-
|
|
151
|
-
|
|
159
|
+
Only processes the top classification for each detection.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
detections: detections list
|
|
163
|
+
options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
dict mapping above-threshold category IDs to counts
|
|
152
167
|
"""
|
|
153
168
|
|
|
169
|
+
if detections is None or len(detections) == 0:
|
|
170
|
+
return {}
|
|
171
|
+
|
|
172
|
+
if options is None:
|
|
173
|
+
options = ClassificationSmoothingOptions()
|
|
174
|
+
|
|
154
175
|
category_to_count = defaultdict(int)
|
|
155
176
|
|
|
156
177
|
for det in detections:
|
|
157
178
|
if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
|
|
158
|
-
assert len(det['classifications']) == 1
|
|
179
|
+
# assert len(det['classifications']) == 1
|
|
159
180
|
c = det['classifications'][0]
|
|
160
181
|
if c[1] >= options.classification_confidence_threshold:
|
|
161
182
|
category_to_count[c[0]] += 1
|
|
@@ -167,9 +188,16 @@ def _count_detections_by_category(detections,options):
|
|
|
167
188
|
return category_to_count
|
|
168
189
|
|
|
169
190
|
|
|
170
|
-
def
|
|
191
|
+
def get_classification_description_string(category_to_count,classification_descriptions):
|
|
171
192
|
"""
|
|
172
193
|
Return a string summarizing the image content according to [category_to_count].
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
category_to_count (dict): a dict mapping category IDs to counts
|
|
197
|
+
classification_descriptions (dict): a dict mapping category IDs to description strings
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
string: a description of this image's content, e.g. "rabbit (4), human (1)"
|
|
173
201
|
"""
|
|
174
202
|
|
|
175
203
|
category_strings = []
|
|
@@ -202,7 +230,7 @@ def _print_counts_with_names(category_to_count,classification_descriptions):
|
|
|
202
230
|
|
|
203
231
|
def _prepare_results_for_smoothing(input_file,options):
|
|
204
232
|
"""
|
|
205
|
-
Load results from [input_file] if necessary, prepare category
|
|
233
|
+
Load results from [input_file] if necessary, prepare category descriptions
|
|
206
234
|
for smoothing. Adds pre-smoothing descriptions to every image if the options
|
|
207
235
|
say we're supposed to do that.
|
|
208
236
|
"""
|
|
@@ -280,10 +308,10 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
280
308
|
continue
|
|
281
309
|
|
|
282
310
|
detections = im['detections']
|
|
283
|
-
category_to_count =
|
|
311
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
284
312
|
|
|
285
313
|
im['pre_smoothing_description'] = \
|
|
286
|
-
|
|
314
|
+
get_classification_description_string(category_to_count, classification_descriptions)
|
|
287
315
|
|
|
288
316
|
|
|
289
317
|
return {
|
|
@@ -316,9 +344,9 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
316
344
|
|
|
317
345
|
## Count the number of instances of each category in this image
|
|
318
346
|
|
|
319
|
-
category_to_count =
|
|
347
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
320
348
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
321
|
-
#
|
|
349
|
+
# get_classification_description_string(category_to_count, classification_descriptions)
|
|
322
350
|
|
|
323
351
|
if len(category_to_count) <= 1:
|
|
324
352
|
return None
|
|
@@ -351,7 +379,7 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
351
379
|
|
|
352
380
|
if verbose_debug_enabled:
|
|
353
381
|
_print_counts_with_names(category_to_count,classification_descriptions)
|
|
354
|
-
import
|
|
382
|
+
from IPython import embed; embed()
|
|
355
383
|
|
|
356
384
|
|
|
357
385
|
## Possibly change "other" classifications to the most common category
|
|
@@ -377,6 +405,11 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
377
405
|
if (c[1] >= options.classification_confidence_threshold) and \
|
|
378
406
|
(c[0] in other_category_ids):
|
|
379
407
|
|
|
408
|
+
if verbose_debug_enabled:
|
|
409
|
+
print('Replacing {} with {}'.format(
|
|
410
|
+
classification_descriptions[c[0]],
|
|
411
|
+
classification_descriptions[c[1]]))
|
|
412
|
+
|
|
380
413
|
n_other_classifications_changed_this_image += 1
|
|
381
414
|
c[0] = most_common_category
|
|
382
415
|
|
|
@@ -385,11 +418,14 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
385
418
|
# ...for each detection
|
|
386
419
|
|
|
387
420
|
# ...if we should overwrite all "other" classifications
|
|
421
|
+
|
|
422
|
+
if verbose_debug_enabled:
|
|
423
|
+
print('Made {} other changes'.format(n_other_classifications_changed_this_image))
|
|
388
424
|
|
|
389
425
|
|
|
390
426
|
## Re-count
|
|
391
427
|
|
|
392
|
-
category_to_count =
|
|
428
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
393
429
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
394
430
|
keys = list(category_to_count.keys())
|
|
395
431
|
max_count = category_to_count[keys[0]]
|
|
@@ -399,13 +435,18 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
399
435
|
|
|
400
436
|
## Possibly change some non-dominant classifications to the dominant category
|
|
401
437
|
|
|
438
|
+
process_taxonomic_rules = \
|
|
439
|
+
(classification_descriptions_clean is not None) and \
|
|
440
|
+
(len(classification_descriptions_clean) > 0) and \
|
|
441
|
+
(len(category_to_count) > 1)
|
|
442
|
+
|
|
402
443
|
n_detections_flipped_this_image = 0
|
|
403
444
|
|
|
404
445
|
# Don't do this if the most common category is an "other" category, or
|
|
405
446
|
# if we don't have enough of the most common category
|
|
406
447
|
if (most_common_category not in other_category_ids) and \
|
|
407
448
|
(max_count >= options.min_detections_to_overwrite_secondary):
|
|
408
|
-
|
|
449
|
+
|
|
409
450
|
# i_det = 0; det = detections[i_det]
|
|
410
451
|
for i_det,det in enumerate(detections):
|
|
411
452
|
|
|
@@ -423,6 +464,32 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
423
464
|
# Don't bother with below-threshold classifications
|
|
424
465
|
if c[1] < options.classification_confidence_threshold:
|
|
425
466
|
continue
|
|
467
|
+
|
|
468
|
+
# If we're doing taxonomic processing, at this stage, don't turn children
|
|
469
|
+
# into parents; we'll likely turn parents into children in the next stage.
|
|
470
|
+
|
|
471
|
+
if process_taxonomic_rules:
|
|
472
|
+
|
|
473
|
+
most_common_category_description = \
|
|
474
|
+
classification_descriptions_clean[most_common_category]
|
|
475
|
+
|
|
476
|
+
category_id_this_classification = c[0]
|
|
477
|
+
assert category_id_this_classification in category_to_count
|
|
478
|
+
|
|
479
|
+
category_description_this_classification = \
|
|
480
|
+
classification_descriptions_clean[category_id_this_classification]
|
|
481
|
+
|
|
482
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
483
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
484
|
+
# step above.
|
|
485
|
+
if len(category_description_this_classification) == 0:
|
|
486
|
+
continue
|
|
487
|
+
|
|
488
|
+
most_common_category_is_parent_of_this_category = \
|
|
489
|
+
most_common_category_description in category_description_this_classification
|
|
490
|
+
|
|
491
|
+
if most_common_category_is_parent_of_this_category:
|
|
492
|
+
continue
|
|
426
493
|
|
|
427
494
|
# If we have fewer of this category than the most common category,
|
|
428
495
|
# but not *too* many, flip it to the most common category.
|
|
@@ -436,10 +503,14 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
436
503
|
|
|
437
504
|
# ...if the dominant category is legit
|
|
438
505
|
|
|
506
|
+
if verbose_debug_enabled:
|
|
507
|
+
print('Made {} non-dominant --> dominant changes'.format(
|
|
508
|
+
n_detections_flipped_this_image))
|
|
509
|
+
|
|
439
510
|
|
|
440
511
|
## Re-count
|
|
441
512
|
|
|
442
|
-
category_to_count =
|
|
513
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
443
514
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
444
515
|
keys = list(category_to_count.keys())
|
|
445
516
|
max_count = category_to_count[keys[0]]
|
|
@@ -449,8 +520,6 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
449
520
|
|
|
450
521
|
## Possibly collapse higher-level taxonomic predictions down to lower levels
|
|
451
522
|
|
|
452
|
-
# ...when the most common class is a child of a less common class.
|
|
453
|
-
|
|
454
523
|
n_taxonomic_changes_this_image = 0
|
|
455
524
|
|
|
456
525
|
process_taxonomic_rules = \
|
|
@@ -552,7 +621,7 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
552
621
|
|
|
553
622
|
## Re-count
|
|
554
623
|
|
|
555
|
-
category_to_count =
|
|
624
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
556
625
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
557
626
|
keys = list(category_to_count.keys())
|
|
558
627
|
max_count = category_to_count[keys[0]]
|
|
@@ -895,7 +964,7 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
895
964
|
image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
|
|
896
965
|
|
|
897
966
|
# if 'file' in image_filenames_this_sequence:
|
|
898
|
-
# import
|
|
967
|
+
# from IPython import embed; embed()
|
|
899
968
|
|
|
900
969
|
detections_this_sequence = []
|
|
901
970
|
for image_filename in image_filenames_this_sequence:
|
|
@@ -978,3 +1047,431 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
978
1047
|
return d
|
|
979
1048
|
|
|
980
1049
|
# ...smooth_classification_results_sequence_level(...)
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
def restrict_to_taxa_list(taxa_list,
|
|
1053
|
+
speciesnet_taxonomy_file,
|
|
1054
|
+
input_file,
|
|
1055
|
+
output_file,
|
|
1056
|
+
allow_walk_down=False,
|
|
1057
|
+
add_pre_filtering_description=True):
|
|
1058
|
+
"""
|
|
1059
|
+
Given a prediction file in MD .json format, likely without having had
|
|
1060
|
+
a geofence applied, apply a custom taxa list.
|
|
1061
|
+
|
|
1062
|
+
Args:
|
|
1063
|
+
taxa_list (str or list): list of latin names, or a text file containing
|
|
1064
|
+
a list of latin names. Optionally may contain a second (comma-delimited)
|
|
1065
|
+
column containing common names, used only for debugging. Latin names
|
|
1066
|
+
must exist in the SpeciesNet taxonomy.
|
|
1067
|
+
speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
|
|
1068
|
+
model release (with 7-token taxonomy entries)
|
|
1069
|
+
input_file (str): .json file to read, in MD format. This can be None, in which
|
|
1070
|
+
case this function just validates [taxa_list].
|
|
1071
|
+
output_file (str): .json file to write, in MD format
|
|
1072
|
+
allow_walk_down (bool, optional): should we walk down the taxonomy tree
|
|
1073
|
+
when making mappings if a parent has only a single allowable child?
|
|
1074
|
+
For example, if only a single felid species is allowed, should other
|
|
1075
|
+
felid predictions be mapped to that species, as opposed to being mapped
|
|
1076
|
+
to the family?
|
|
1077
|
+
add_pre_restriction_description (bool, optional): should we add a new metadata
|
|
1078
|
+
field that summarizes each image's classifications prior to taxonomic
|
|
1079
|
+
restriction?
|
|
1080
|
+
"""
|
|
1081
|
+
|
|
1082
|
+
##%% Read target taxa list
|
|
1083
|
+
|
|
1084
|
+
if isinstance(taxa_list,str):
|
|
1085
|
+
assert os.path.isfile(taxa_list), \
|
|
1086
|
+
'Could not find taxa list file {}'.format(taxa_list)
|
|
1087
|
+
with open(taxa_list,'r') as f:
|
|
1088
|
+
taxa_list = f.readlines()
|
|
1089
|
+
|
|
1090
|
+
taxa_list = [s.strip().lower() for s in taxa_list]
|
|
1091
|
+
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
1092
|
+
|
|
1093
|
+
target_latin_to_common = {}
|
|
1094
|
+
for s in taxa_list:
|
|
1095
|
+
if s.strip().startswith('#'):
|
|
1096
|
+
continue
|
|
1097
|
+
tokens = s.split(',')
|
|
1098
|
+
assert len(tokens) <= 2
|
|
1099
|
+
binomial_name = tokens[0]
|
|
1100
|
+
assert len(binomial_name.split(' ')) in (1,2,3), \
|
|
1101
|
+
'Illegal binomial name in species list: {}'.format(binomial_name)
|
|
1102
|
+
if len(tokens) > 0:
|
|
1103
|
+
common_name = tokens[1].strip().lower()
|
|
1104
|
+
else:
|
|
1105
|
+
common_name = None
|
|
1106
|
+
assert binomial_name not in target_latin_to_common
|
|
1107
|
+
target_latin_to_common[binomial_name] = common_name
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
##%% Read taxonomy file
|
|
1111
|
+
|
|
1112
|
+
with open(speciesnet_taxonomy_file,'r') as f:
|
|
1113
|
+
speciesnet_taxonomy_list = f.readlines()
|
|
1114
|
+
speciesnet_taxonomy_list = [s.strip() for s in \
|
|
1115
|
+
speciesnet_taxonomy_list if len(s.strip()) > 0]
|
|
1116
|
+
|
|
1117
|
+
# Maps the latin name of every taxon to the corresponding full taxon string
|
|
1118
|
+
#
|
|
1119
|
+
# For species, the key is a binomial name
|
|
1120
|
+
speciesnet_latin_name_to_taxon_string = {}
|
|
1121
|
+
speciesnet_common_name_to_taxon_string = {}
|
|
1122
|
+
|
|
1123
|
+
def _insert_taxonomy_string(s):
|
|
1124
|
+
|
|
1125
|
+
tokens = s.split(';')
|
|
1126
|
+
assert len(tokens) == 7
|
|
1127
|
+
|
|
1128
|
+
guid = tokens[0] # noqa
|
|
1129
|
+
class_name = tokens[1]
|
|
1130
|
+
order = tokens[2]
|
|
1131
|
+
family = tokens[3]
|
|
1132
|
+
genus = tokens[4]
|
|
1133
|
+
species = tokens[5]
|
|
1134
|
+
common_name = tokens[6]
|
|
1135
|
+
|
|
1136
|
+
if len(class_name) == 0:
|
|
1137
|
+
assert common_name in ('animal','vehicle','blank')
|
|
1138
|
+
return
|
|
1139
|
+
|
|
1140
|
+
if len(species) > 0:
|
|
1141
|
+
assert all([len(s) > 0 for s in [genus,family,order]])
|
|
1142
|
+
binomial_name = genus + ' ' + species
|
|
1143
|
+
if binomial_name not in speciesnet_latin_name_to_taxon_string:
|
|
1144
|
+
speciesnet_latin_name_to_taxon_string[binomial_name] = s
|
|
1145
|
+
elif len(genus) > 0:
|
|
1146
|
+
assert all([len(s) > 0 for s in [family,order]])
|
|
1147
|
+
if genus not in speciesnet_latin_name_to_taxon_string:
|
|
1148
|
+
speciesnet_latin_name_to_taxon_string[genus] = s
|
|
1149
|
+
elif len(family) > 0:
|
|
1150
|
+
assert len(order) > 0
|
|
1151
|
+
if family not in speciesnet_latin_name_to_taxon_string:
|
|
1152
|
+
speciesnet_latin_name_to_taxon_string[family] = s
|
|
1153
|
+
elif len(order) > 0:
|
|
1154
|
+
if order not in speciesnet_latin_name_to_taxon_string:
|
|
1155
|
+
speciesnet_latin_name_to_taxon_string[order] = s
|
|
1156
|
+
else:
|
|
1157
|
+
if class_name not in speciesnet_latin_name_to_taxon_string:
|
|
1158
|
+
speciesnet_latin_name_to_taxon_string[class_name] = s
|
|
1159
|
+
|
|
1160
|
+
if len(common_name) > 0:
|
|
1161
|
+
if common_name not in speciesnet_common_name_to_taxon_string:
|
|
1162
|
+
speciesnet_common_name_to_taxon_string[common_name] = s
|
|
1163
|
+
|
|
1164
|
+
for s in speciesnet_taxonomy_list:
|
|
1165
|
+
|
|
1166
|
+
_insert_taxonomy_string(s)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
##%% Make sure all parent taxa are represented in the taxonomy
|
|
1170
|
+
|
|
1171
|
+
# In theory any taxon that appears as the parent of another taxon should
|
|
1172
|
+
# also be in the taxonomy, but this isn't always true, so we fix it here.
|
|
1173
|
+
|
|
1174
|
+
new_taxon_string_to_missing_tokens = defaultdict(list)
|
|
1175
|
+
|
|
1176
|
+
# latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
|
|
1177
|
+
for latin_name in speciesnet_latin_name_to_taxon_string.keys():
|
|
1178
|
+
|
|
1179
|
+
if 'no cv result' in latin_name:
|
|
1180
|
+
continue
|
|
1181
|
+
|
|
1182
|
+
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1183
|
+
tokens = taxon_string.split(';')
|
|
1184
|
+
|
|
1185
|
+
# Don't process GUID, species, or common name
|
|
1186
|
+
# i_token = 6
|
|
1187
|
+
for i_token in range(1,len(tokens)-2):
|
|
1188
|
+
|
|
1189
|
+
test_token = tokens[i_token]
|
|
1190
|
+
if len(test_token) == 0:
|
|
1191
|
+
continue
|
|
1192
|
+
|
|
1193
|
+
# Do we need to make up a taxon for this token?
|
|
1194
|
+
if test_token not in speciesnet_latin_name_to_taxon_string:
|
|
1195
|
+
|
|
1196
|
+
new_tokens = [''] * 7
|
|
1197
|
+
new_tokens[0] = 'fake_guid'
|
|
1198
|
+
for i_copy_token in range(1,i_token+1):
|
|
1199
|
+
new_tokens[i_copy_token] = tokens[i_copy_token]
|
|
1200
|
+
new_tokens[-1] = test_token + ' species'
|
|
1201
|
+
assert new_tokens[-2] == ''
|
|
1202
|
+
new_taxon_string = ';'.join(new_tokens)
|
|
1203
|
+
# assert new_taxon_string not in new_taxon_strings
|
|
1204
|
+
new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
|
|
1205
|
+
|
|
1206
|
+
# ...for each token
|
|
1207
|
+
|
|
1208
|
+
# ...for each taxon
|
|
1209
|
+
|
|
1210
|
+
print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
|
|
1211
|
+
len(new_taxon_string_to_missing_tokens)))
|
|
1212
|
+
|
|
1213
|
+
new_taxon_string_to_missing_tokens = \
|
|
1214
|
+
sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
|
|
1215
|
+
for taxon_string in new_taxon_string_to_missing_tokens:
|
|
1216
|
+
missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
|
|
1217
|
+
print('{} ({})'.format(taxon_string,missing_taxa))
|
|
1218
|
+
|
|
1219
|
+
for new_taxon_string in new_taxon_string_to_missing_tokens:
|
|
1220
|
+
_insert_taxonomy_string(new_taxon_string)
|
|
1221
|
+
|
|
1222
|
+
|
|
1223
|
+
##%% Make sure all species on the allow-list are in the taxonomy
|
|
1224
|
+
|
|
1225
|
+
n_failed_mappings = 0
|
|
1226
|
+
|
|
1227
|
+
for target_taxon_latin_name in target_latin_to_common.keys():
|
|
1228
|
+
if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
|
|
1229
|
+
common_name = target_latin_to_common[target_taxon_latin_name]
|
|
1230
|
+
s = '{} ({}) not in speciesnet taxonomy'.format(
|
|
1231
|
+
target_taxon_latin_name,common_name)
|
|
1232
|
+
if common_name in speciesnet_common_name_to_taxon_string:
|
|
1233
|
+
s += ' (common name maps to {})'.format(
|
|
1234
|
+
speciesnet_common_name_to_taxon_string[common_name])
|
|
1235
|
+
print(s)
|
|
1236
|
+
n_failed_mappings += 1
|
|
1237
|
+
|
|
1238
|
+
if n_failed_mappings > 0:
|
|
1239
|
+
raise ValueError('Cannot continue with geofence generation')
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
##%% For the allow-list, map each parent taxon to a set of allowable child taxa
|
|
1243
|
+
|
|
1244
|
+
# Maps parent names to all allowed child names, or None if this is the
|
|
1245
|
+
# lowest-level allowable taxon on this path
|
|
1246
|
+
allowed_parent_taxon_to_child_taxa = defaultdict(set)
|
|
1247
|
+
|
|
1248
|
+
# latin_name = next(iter(target_latin_to_common.keys()))
|
|
1249
|
+
for latin_name in target_latin_to_common:
|
|
1250
|
+
|
|
1251
|
+
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1252
|
+
tokens = taxon_string.split(';')
|
|
1253
|
+
assert len(tokens) == 7
|
|
1254
|
+
|
|
1255
|
+
# Remove GUID and common mame
|
|
1256
|
+
#
|
|
1257
|
+
# This is now always class/order/family/genus/species
|
|
1258
|
+
tokens = tokens[1:-1]
|
|
1259
|
+
|
|
1260
|
+
child_taxon = None
|
|
1261
|
+
|
|
1262
|
+
# If this is a species
|
|
1263
|
+
if len(tokens[-1]) > 0:
|
|
1264
|
+
binomial_name = tokens[-2] + ' ' + tokens[-1]
|
|
1265
|
+
assert binomial_name == latin_name
|
|
1266
|
+
allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
|
|
1267
|
+
child_taxon = binomial_name
|
|
1268
|
+
|
|
1269
|
+
# The first candidate parent is the genus
|
|
1270
|
+
parent_token_index = len(tokens) - 2
|
|
1271
|
+
|
|
1272
|
+
while(parent_token_index >= 0):
|
|
1273
|
+
|
|
1274
|
+
parent_taxon = tokens[parent_token_index]
|
|
1275
|
+
allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
|
|
1276
|
+
child_taxon = parent_taxon
|
|
1277
|
+
parent_token_index -= 1
|
|
1278
|
+
|
|
1279
|
+
# ...for each allowed latin name
|
|
1280
|
+
|
|
1281
|
+
allowed_parent_taxon_to_child_taxa = \
|
|
1282
|
+
sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
|
|
1283
|
+
|
|
1284
|
+
|
|
1285
|
+
##%% If we were just validating the custom taxa file, we're done
|
|
1286
|
+
|
|
1287
|
+
if input_file is None:
|
|
1288
|
+
print('Finished validating custom taxonomy list')
|
|
1289
|
+
return
|
|
1290
|
+
|
|
1291
|
+
|
|
1292
|
+
##%% Map all predictions that exist in this dataset...
|
|
1293
|
+
|
|
1294
|
+
# ...to the prediction we should generate.
|
|
1295
|
+
|
|
1296
|
+
with open(input_file,'r') as f:
|
|
1297
|
+
input_data = json.load(f)
|
|
1298
|
+
|
|
1299
|
+
input_category_id_to_common_name = input_data['classification_categories'] #noqa
|
|
1300
|
+
input_category_id_to_taxonomy_string = \
|
|
1301
|
+
input_data['classification_category_descriptions']
|
|
1302
|
+
|
|
1303
|
+
input_category_id_to_output_taxon_string = {}
|
|
1304
|
+
|
|
1305
|
+
# input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
|
|
1306
|
+
for input_category_id in input_category_id_to_taxonomy_string.keys():
|
|
1307
|
+
|
|
1308
|
+
input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
|
|
1309
|
+
input_taxon_tokens = input_taxon_string.split(';')
|
|
1310
|
+
assert len(input_taxon_tokens) == 7
|
|
1311
|
+
|
|
1312
|
+
# Don't mess with blank/no-cv-result/animal/human
|
|
1313
|
+
if (input_taxon_string in non_taxonomic_prediction_strings) or \
|
|
1314
|
+
(input_taxon_string == human_prediction_string):
|
|
1315
|
+
input_category_id_to_output_taxon_string[input_category_id] = \
|
|
1316
|
+
input_taxon_string
|
|
1317
|
+
continue
|
|
1318
|
+
|
|
1319
|
+
# Remove GUID and common mame
|
|
1320
|
+
|
|
1321
|
+
# This is now always class/order/family/genus/species
|
|
1322
|
+
input_taxon_tokens = input_taxon_tokens[1:-1]
|
|
1323
|
+
|
|
1324
|
+
test_index = len(input_taxon_tokens) - 1
|
|
1325
|
+
target_taxon = None
|
|
1326
|
+
|
|
1327
|
+
# Start at the species level, and see whether each taxon is allowed
|
|
1328
|
+
while((test_index >= 0) and (target_taxon is None)):
|
|
1329
|
+
|
|
1330
|
+
# Species are represented as binomial names
|
|
1331
|
+
if (test_index == (len(input_taxon_tokens) - 1)) and \
|
|
1332
|
+
(len(input_taxon_tokens[-1]) > 0):
|
|
1333
|
+
test_taxon_name = \
|
|
1334
|
+
input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
|
|
1335
|
+
else:
|
|
1336
|
+
test_taxon_name = input_taxon_tokens[test_index]
|
|
1337
|
+
|
|
1338
|
+
# If we haven't yet found the level at which this taxon is non-empty,
|
|
1339
|
+
# keep going up
|
|
1340
|
+
if len(test_taxon_name) == 0:
|
|
1341
|
+
test_index -= 1
|
|
1342
|
+
continue
|
|
1343
|
+
|
|
1344
|
+
assert test_taxon_name in speciesnet_latin_name_to_taxon_string
|
|
1345
|
+
|
|
1346
|
+
# Is this taxon allowed according to the custom species list?
|
|
1347
|
+
if test_taxon_name in allowed_parent_taxon_to_child_taxa:
|
|
1348
|
+
|
|
1349
|
+
allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
|
|
1350
|
+
assert allowed_child_taxa is not None
|
|
1351
|
+
|
|
1352
|
+
# If this is the lowest-level allowable token or there is not a
|
|
1353
|
+
# unique child, don't walk any further, even if walking down
|
|
1354
|
+
# is enabled.
|
|
1355
|
+
if (None in allowed_child_taxa):
|
|
1356
|
+
assert len(allowed_child_taxa) == 1
|
|
1357
|
+
|
|
1358
|
+
if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
|
|
1359
|
+
target_taxon = test_taxon_name
|
|
1360
|
+
elif not allow_walk_down:
|
|
1361
|
+
target_taxon = test_taxon_name
|
|
1362
|
+
else:
|
|
1363
|
+
# If there's a unique child, walk back *down* the allowable
|
|
1364
|
+
# taxa until we run out of unique children
|
|
1365
|
+
while ((next(iter(allowed_child_taxa)) is not None) and \
|
|
1366
|
+
(len(allowed_child_taxa) == 1)):
|
|
1367
|
+
candidate_taxon = next(iter(allowed_child_taxa))
|
|
1368
|
+
assert candidate_taxon in allowed_parent_taxon_to_child_taxa
|
|
1369
|
+
assert candidate_taxon in speciesnet_latin_name_to_taxon_string
|
|
1370
|
+
allowed_child_taxa = \
|
|
1371
|
+
allowed_parent_taxon_to_child_taxa[candidate_taxon]
|
|
1372
|
+
target_taxon = candidate_taxon
|
|
1373
|
+
|
|
1374
|
+
# ...if this is an allowed taxon
|
|
1375
|
+
|
|
1376
|
+
test_index -= 1
|
|
1377
|
+
|
|
1378
|
+
# ...for each token
|
|
1379
|
+
|
|
1380
|
+
if target_taxon is None:
|
|
1381
|
+
output_taxon_string = animal_prediction_string
|
|
1382
|
+
else:
|
|
1383
|
+
output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
|
|
1384
|
+
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
1385
|
+
|
|
1386
|
+
# ...for each category
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
|
+
##%% Build the new tables
|
|
1390
|
+
|
|
1391
|
+
input_category_id_to_output_category_id = {}
|
|
1392
|
+
output_taxon_string_to_category_id = {}
|
|
1393
|
+
output_category_id_to_common_name = {}
|
|
1394
|
+
|
|
1395
|
+
for input_category_id in input_category_id_to_output_taxon_string:
|
|
1396
|
+
|
|
1397
|
+
original_common_name = \
|
|
1398
|
+
input_category_id_to_common_name[input_category_id]
|
|
1399
|
+
original_taxon_string = \
|
|
1400
|
+
input_category_id_to_taxonomy_string[input_category_id]
|
|
1401
|
+
output_taxon_string = \
|
|
1402
|
+
input_category_id_to_output_taxon_string[input_category_id]
|
|
1403
|
+
|
|
1404
|
+
output_common_name = output_taxon_string.split(';')[-1]
|
|
1405
|
+
|
|
1406
|
+
# Do we need to create a new output category?
|
|
1407
|
+
if output_taxon_string not in output_taxon_string_to_category_id:
|
|
1408
|
+
output_category_id = str(len(output_taxon_string_to_category_id))
|
|
1409
|
+
output_taxon_string_to_category_id[output_taxon_string] = \
|
|
1410
|
+
output_category_id
|
|
1411
|
+
output_category_id_to_common_name[output_category_id] = \
|
|
1412
|
+
output_common_name
|
|
1413
|
+
else:
|
|
1414
|
+
output_category_id = \
|
|
1415
|
+
output_taxon_string_to_category_id[output_taxon_string]
|
|
1416
|
+
|
|
1417
|
+
input_category_id_to_output_category_id[input_category_id] = \
|
|
1418
|
+
output_category_id
|
|
1419
|
+
|
|
1420
|
+
if False:
|
|
1421
|
+
print('Mapping {} ({}) to:\n{} ({})\n'.format(
|
|
1422
|
+
original_common_name,original_taxon_string,
|
|
1423
|
+
output_common_name,output_taxon_string))
|
|
1424
|
+
if False:
|
|
1425
|
+
print('Mapping {} to {}'.format(
|
|
1426
|
+
original_common_name,output_common_name,))
|
|
1427
|
+
|
|
1428
|
+
# ...for each category
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
##%% Remap all category labels
|
|
1432
|
+
|
|
1433
|
+
assert len(set(output_taxon_string_to_category_id.keys())) == \
|
|
1434
|
+
len(set(output_taxon_string_to_category_id.values()))
|
|
1435
|
+
|
|
1436
|
+
output_category_id_to_taxon_string = \
|
|
1437
|
+
invert_dictionary(output_taxon_string_to_category_id)
|
|
1438
|
+
|
|
1439
|
+
with open(input_file,'r') as f:
|
|
1440
|
+
output_data = json.load(f)
|
|
1441
|
+
|
|
1442
|
+
classification_descriptions = None
|
|
1443
|
+
if 'classification_category_descriptions' in output_data:
|
|
1444
|
+
classification_descriptions = output_data['classification_category_descriptions']
|
|
1445
|
+
|
|
1446
|
+
for im in tqdm(output_data['images']):
|
|
1447
|
+
|
|
1448
|
+
if 'detections' not in im or im['detections'] is None:
|
|
1449
|
+
continue
|
|
1450
|
+
|
|
1451
|
+
# Possibly prepare a pre-filtering description
|
|
1452
|
+
pre_filtering_description = None
|
|
1453
|
+
if classification_descriptions is not None and add_pre_filtering_description:
|
|
1454
|
+
category_to_count = count_detections_by_classification_category(im['detections'])
|
|
1455
|
+
pre_filtering_description = \
|
|
1456
|
+
get_classification_description_string(category_to_count,classification_descriptions)
|
|
1457
|
+
im['pre_filtering_description'] = pre_filtering_description
|
|
1458
|
+
|
|
1459
|
+
for det in im['detections']:
|
|
1460
|
+
if 'classifications' in det:
|
|
1461
|
+
for classification in det['classifications']:
|
|
1462
|
+
classification[0] = \
|
|
1463
|
+
input_category_id_to_output_category_id[classification[0]]
|
|
1464
|
+
|
|
1465
|
+
# ...for each image
|
|
1466
|
+
|
|
1467
|
+
output_data['classification_categories'] = output_category_id_to_common_name
|
|
1468
|
+
output_data['classification_category_descriptions'] = \
|
|
1469
|
+
output_category_id_to_taxon_string
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
##%% Write output
|
|
1473
|
+
|
|
1474
|
+
with open(output_file,'w') as f:
|
|
1475
|
+
json.dump(output_data,f,indent=1)
|
|
1476
|
+
|
|
1477
|
+
# ...def restrict_to_taxa_list(...)
|
|
@@ -130,7 +130,7 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
|
|
|
130
130
|
containing crop IDs, likely generated via create_crop_folder. All
|
|
131
131
|
non-standard fields in this file will be passed along to [output_file].
|
|
132
132
|
crop_results_file (str): results file for the crop folder
|
|
133
|
-
output_file (str):
|
|
133
|
+
output_file (str): output .json file, containing crop-level classifications
|
|
134
134
|
mapped back to the image level.
|
|
135
135
|
delete_crop_information (bool, optional): whether to delete the "crop_id" and
|
|
136
136
|
"crop_filename_relative" fields from each detection, if present.
|