megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -3,16 +3,17 @@
|
|
|
3
3
|
classification_postprocessing.py
|
|
4
4
|
|
|
5
5
|
Functions for postprocessing species classification results, particularly:
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
* Smoothing results within an image (an image with 700 cows and one deer is really just 701
|
|
8
8
|
cows)
|
|
9
9
|
* Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
|
|
10
10
|
is really just a deer)
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
#%% Constants and imports
|
|
15
15
|
|
|
16
|
+
import os
|
|
16
17
|
import json
|
|
17
18
|
import copy
|
|
18
19
|
|
|
@@ -20,10 +21,18 @@ from collections import defaultdict
|
|
|
20
21
|
from tqdm import tqdm
|
|
21
22
|
|
|
22
23
|
from megadetector.utils.ct_utils import is_list_sorted
|
|
24
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
25
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_key
|
|
26
|
+
from megadetector.utils.ct_utils import invert_dictionary
|
|
27
|
+
|
|
23
28
|
from megadetector.utils.wi_utils import clean_taxonomy_string
|
|
24
29
|
from megadetector.utils.wi_utils import taxonomy_level_index
|
|
25
30
|
from megadetector.utils.wi_utils import taxonomy_level_string_to_index
|
|
26
|
-
|
|
31
|
+
|
|
32
|
+
from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
|
|
33
|
+
from megadetector.utils.wi_utils import human_prediction_string
|
|
34
|
+
from megadetector.utils.wi_utils import animal_prediction_string
|
|
35
|
+
from megadetector.utils.wi_utils import blank_prediction_string # noqa
|
|
27
36
|
|
|
28
37
|
|
|
29
38
|
#%% Options classes
|
|
@@ -35,83 +44,83 @@ class ClassificationSmoothingOptions:
|
|
|
35
44
|
"""
|
|
36
45
|
|
|
37
46
|
def __init__(self):
|
|
38
|
-
|
|
39
|
-
#: How many detections do we need in a dominant category to overwrite
|
|
40
|
-
#: non-dominant classifications? This is irrelevant if
|
|
47
|
+
|
|
48
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
49
|
+
#: non-dominant classifications? This is irrelevant if
|
|
41
50
|
#: max_detections_nondominant_class <= 1.
|
|
42
51
|
self.min_detections_to_overwrite_secondary = 4
|
|
43
|
-
|
|
44
|
-
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
52
|
+
|
|
53
|
+
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
45
54
|
#: this many classifications in an image, leave them alone.
|
|
46
55
|
#:
|
|
47
56
|
#: If this is <= 1, we won't replace non-dominant, non-other classes
|
|
48
57
|
#: with the dominant class, even if there are 900 cows and 1 deer.
|
|
49
58
|
self.max_detections_nondominant_class = 1
|
|
50
|
-
|
|
51
|
-
#: How many detections do we need in a dominant category to overwrite
|
|
52
|
-
#: non-dominant classifications in the same family? If this is <= 0,
|
|
53
|
-
#: we'll skip this step. This option doesn't mean anything if
|
|
59
|
+
|
|
60
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
61
|
+
#: non-dominant classifications in the same family? If this is <= 0,
|
|
62
|
+
#: we'll skip this step. This option doesn't mean anything if
|
|
54
63
|
#: max_detections_nondominant_class_same_family <= 1.
|
|
55
64
|
self.min_detections_to_overwrite_secondary_same_family = 2
|
|
56
|
-
|
|
57
|
-
#: If we have this many classifications of a nondominant category,
|
|
65
|
+
|
|
66
|
+
#: If we have this many classifications of a nondominant category,
|
|
58
67
|
#: we won't do same-family overwrites. <= 1 means "even if there are
|
|
59
68
|
#: a million deer, if there are two million moose, call all the deer
|
|
60
|
-
#: moose". This option doesn't mean anything if
|
|
69
|
+
#: moose". This option doesn't mean anything if
|
|
61
70
|
#: min_detections_to_overwrite_secondary_same_family <= 0.
|
|
62
71
|
self.max_detections_nondominant_class_same_family = -1
|
|
63
|
-
|
|
64
|
-
#: If the dominant class has at least this many classifications, overwrite
|
|
72
|
+
|
|
73
|
+
#: If the dominant class has at least this many classifications, overwrite
|
|
65
74
|
#: "other" classifications with the dominant class
|
|
66
75
|
self.min_detections_to_overwrite_other = 2
|
|
67
|
-
|
|
76
|
+
|
|
68
77
|
#: Names to treat as "other" categories; can't be None, but can be empty
|
|
69
78
|
#:
|
|
70
79
|
#: "Other" classifications will be changed to the dominant category, regardless
|
|
71
|
-
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
80
|
+
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
72
81
|
#: examples of the dominant class. For example, cow/other will remain unchanged,
|
|
73
82
|
#: but cow/cow/other will become cow/cow/cow.
|
|
74
83
|
self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
|
|
75
|
-
|
|
84
|
+
|
|
76
85
|
#: We're not even going to mess around with classifications below this threshold.
|
|
77
86
|
#:
|
|
78
87
|
#: We won't count them, we won't over-write them, they don't exist during the
|
|
79
88
|
#: within-image smoothing step.
|
|
80
89
|
self.classification_confidence_threshold = 0.5
|
|
81
|
-
|
|
90
|
+
|
|
82
91
|
#: We're not even going to mess around with detections below this threshold.
|
|
83
92
|
#:
|
|
84
93
|
#: We won't count them, we won't over-write them, they don't exist during the
|
|
85
94
|
#: within-image smoothing step.
|
|
86
95
|
self.detection_confidence_threshold = 0.15
|
|
87
|
-
|
|
96
|
+
|
|
88
97
|
#: If classification descriptions are present and appear to represent taxonomic
|
|
89
|
-
#: information, should we propagate classifications when lower-level taxa are more
|
|
90
|
-
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
98
|
+
#: information, should we propagate classifications when lower-level taxa are more
|
|
99
|
+
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
91
100
|
#: we make that "fox/fox/fox/deer"?
|
|
92
101
|
self.propagate_classifications_through_taxonomy = True
|
|
93
|
-
|
|
94
|
-
#: When propagating classifications down through taxonomy levels, we have to
|
|
102
|
+
|
|
103
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
95
104
|
#: decide whether we prefer more frequent categories or more specific categories.
|
|
96
105
|
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
97
106
|
#: balance levels against counts in this process.
|
|
98
107
|
self.taxonomy_propagation_level_weight = 1.0
|
|
99
|
-
|
|
100
|
-
#: When propagating classifications down through taxonomy levels, we have to
|
|
108
|
+
|
|
109
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
101
110
|
#: decide whether we prefer more frequent categories or more specific categories.
|
|
102
111
|
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
103
112
|
#: balance levels against counts in this process.
|
|
104
113
|
#:
|
|
105
114
|
#: With a very low default value, this just breaks ties.
|
|
106
115
|
self.taxonomy_propagation_count_weight = 0.01
|
|
107
|
-
|
|
116
|
+
|
|
108
117
|
#: Should we record information about the state of labels prior to smoothing?
|
|
109
118
|
self.add_pre_smoothing_description = True
|
|
110
|
-
|
|
119
|
+
|
|
111
120
|
#: When a dict (rather than a file) is passed to either smoothing function,
|
|
112
121
|
#: if this is True, we'll make a copy of the input dict before modifying.
|
|
113
122
|
self.modify_in_place = False
|
|
114
|
-
|
|
123
|
+
|
|
115
124
|
#: Debug options
|
|
116
125
|
self.break_at_image = None
|
|
117
126
|
|
|
@@ -122,56 +131,75 @@ def _results_for_sequence(images_this_sequence,filename_to_results):
|
|
|
122
131
|
"""
|
|
123
132
|
Fetch MD results for every image in this sequence, based on the 'file_name' field
|
|
124
133
|
"""
|
|
125
|
-
|
|
134
|
+
|
|
126
135
|
results_this_sequence = []
|
|
127
136
|
for im in images_this_sequence:
|
|
128
137
|
fn = im['file_name']
|
|
129
138
|
results_this_image = filename_to_results[fn]
|
|
130
139
|
assert isinstance(results_this_image,dict)
|
|
131
140
|
results_this_sequence.append(results_this_image)
|
|
132
|
-
|
|
141
|
+
|
|
133
142
|
return results_this_sequence
|
|
134
|
-
|
|
135
|
-
|
|
143
|
+
|
|
144
|
+
|
|
136
145
|
def _sort_images_by_time(images):
|
|
137
146
|
"""
|
|
138
147
|
Returns a copy of [images], sorted by the 'datetime' field (ascending).
|
|
139
148
|
"""
|
|
140
|
-
return sorted(images, key = lambda im: im['datetime'])
|
|
149
|
+
return sorted(images, key = lambda im: im['datetime'])
|
|
141
150
|
|
|
142
151
|
|
|
143
|
-
def
|
|
152
|
+
def count_detections_by_classification_category(detections,options=None):
|
|
144
153
|
"""
|
|
145
|
-
Count the number of instances of each category in the detections list
|
|
146
|
-
[detections] that have an above-threshold detection. Sort results in descending
|
|
154
|
+
Count the number of instances of each classification category in the detections list
|
|
155
|
+
[detections] that have an above-threshold detection. Sort results in descending
|
|
147
156
|
order by count. Returns a dict mapping category ID --> count. If no detections
|
|
148
157
|
are above threshold, returns an empty dict.
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
158
|
+
|
|
159
|
+
Only processes the top classification for each detection.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
detections: detections list
|
|
163
|
+
options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
dict mapping above-threshold category IDs to counts
|
|
152
167
|
"""
|
|
153
|
-
|
|
168
|
+
|
|
169
|
+
if detections is None or len(detections) == 0:
|
|
170
|
+
return {}
|
|
171
|
+
|
|
172
|
+
if options is None:
|
|
173
|
+
options = ClassificationSmoothingOptions()
|
|
174
|
+
|
|
154
175
|
category_to_count = defaultdict(int)
|
|
155
|
-
|
|
176
|
+
|
|
156
177
|
for det in detections:
|
|
157
178
|
if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
|
|
158
|
-
assert len(det['classifications']) == 1
|
|
179
|
+
# assert len(det['classifications']) == 1
|
|
159
180
|
c = det['classifications'][0]
|
|
160
181
|
if c[1] >= options.classification_confidence_threshold:
|
|
161
|
-
category_to_count[c[0]] += 1
|
|
162
|
-
|
|
182
|
+
category_to_count[c[0]] += 1
|
|
183
|
+
|
|
163
184
|
category_to_count = {k: v for k, v in sorted(category_to_count.items(),
|
|
164
|
-
key=lambda item: item[1],
|
|
185
|
+
key=lambda item: item[1],
|
|
165
186
|
reverse=True)}
|
|
166
|
-
|
|
187
|
+
|
|
167
188
|
return category_to_count
|
|
168
189
|
|
|
169
190
|
|
|
170
|
-
def
|
|
191
|
+
def get_classification_description_string(category_to_count,classification_descriptions):
|
|
171
192
|
"""
|
|
172
193
|
Return a string summarizing the image content according to [category_to_count].
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
category_to_count (dict): a dict mapping category IDs to counts
|
|
197
|
+
classification_descriptions (dict): a dict mapping category IDs to description strings
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
string: a description of this image's content, e.g. "rabbit (4), human (1)"
|
|
173
201
|
"""
|
|
174
|
-
|
|
202
|
+
|
|
175
203
|
category_strings = []
|
|
176
204
|
# category_id = next(iter(category_to_count))
|
|
177
205
|
for category_id in category_to_count:
|
|
@@ -184,29 +212,29 @@ def _get_description_string(category_to_count,classification_descriptions):
|
|
|
184
212
|
count = category_to_count[category_id]
|
|
185
213
|
category_string = '{} ({})'.format(category_name,count)
|
|
186
214
|
category_strings.append(category_string)
|
|
187
|
-
|
|
215
|
+
|
|
188
216
|
return ', '.join(category_strings)
|
|
189
|
-
|
|
217
|
+
|
|
190
218
|
|
|
191
219
|
def _print_counts_with_names(category_to_count,classification_descriptions):
|
|
192
220
|
"""
|
|
193
221
|
Print a list of classification categories with counts, based in the name --> count
|
|
194
222
|
dict [category_to_count]
|
|
195
223
|
"""
|
|
196
|
-
|
|
224
|
+
|
|
197
225
|
for category_id in category_to_count:
|
|
198
226
|
category_name = classification_descriptions[category_id]
|
|
199
227
|
count = category_to_count[category_id]
|
|
200
228
|
print('{}: {} ({})'.format(category_id,category_name,count))
|
|
201
|
-
|
|
202
|
-
|
|
229
|
+
|
|
230
|
+
|
|
203
231
|
def _prepare_results_for_smoothing(input_file,options):
|
|
204
232
|
"""
|
|
205
|
-
Load results from [input_file] if necessary, prepare category
|
|
233
|
+
Load results from [input_file] if necessary, prepare category descriptions
|
|
206
234
|
for smoothing. Adds pre-smoothing descriptions to every image if the options
|
|
207
235
|
say we're supposed to do that.
|
|
208
236
|
"""
|
|
209
|
-
|
|
237
|
+
|
|
210
238
|
if isinstance(input_file,str):
|
|
211
239
|
with open(input_file,'r') as f:
|
|
212
240
|
print('Loading results from:\n{}'.format(input_file))
|
|
@@ -221,71 +249,71 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
221
249
|
|
|
222
250
|
|
|
223
251
|
## Category processing
|
|
224
|
-
|
|
252
|
+
|
|
225
253
|
category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
|
|
226
254
|
other_category_ids = []
|
|
227
255
|
for s in options.other_category_names:
|
|
228
256
|
if s in category_name_to_id:
|
|
229
257
|
other_category_ids.append(category_name_to_id[s])
|
|
230
|
-
|
|
258
|
+
|
|
231
259
|
# Before we do anything else, get rid of everything but the top classification
|
|
232
260
|
# for each detection, and remove the 'classifications' field from detections with
|
|
233
261
|
# no classifications.
|
|
234
262
|
for im in tqdm(d['images']):
|
|
235
|
-
|
|
263
|
+
|
|
236
264
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
237
265
|
continue
|
|
238
|
-
|
|
266
|
+
|
|
239
267
|
detections = im['detections']
|
|
240
|
-
|
|
268
|
+
|
|
241
269
|
for det in detections:
|
|
242
|
-
|
|
270
|
+
|
|
243
271
|
if 'classifications' not in det:
|
|
244
272
|
continue
|
|
245
273
|
if len(det['classifications']) == 0:
|
|
246
274
|
del det['classifications']
|
|
247
275
|
continue
|
|
248
|
-
|
|
276
|
+
|
|
249
277
|
classification_confidence_values = [c[1] for c in det['classifications']]
|
|
250
278
|
assert is_list_sorted(classification_confidence_values,reverse=True)
|
|
251
279
|
det['classifications'] = [det['classifications'][0]]
|
|
252
|
-
|
|
280
|
+
|
|
253
281
|
# ...for each detection in this image
|
|
254
|
-
|
|
282
|
+
|
|
255
283
|
# ...for each image
|
|
256
|
-
|
|
257
|
-
|
|
284
|
+
|
|
285
|
+
|
|
258
286
|
## Clean up classification descriptions so we can test taxonomic relationships
|
|
259
287
|
## by substring testing.
|
|
260
|
-
|
|
288
|
+
|
|
261
289
|
classification_descriptions_clean = None
|
|
262
290
|
classification_descriptions = None
|
|
263
|
-
|
|
291
|
+
|
|
264
292
|
if 'classification_category_descriptions' in d:
|
|
265
293
|
classification_descriptions = d['classification_category_descriptions']
|
|
266
294
|
classification_descriptions_clean = {}
|
|
267
295
|
# category_id = next(iter(classification_descriptions))
|
|
268
|
-
for category_id in classification_descriptions:
|
|
296
|
+
for category_id in classification_descriptions:
|
|
269
297
|
classification_descriptions_clean[category_id] = \
|
|
270
298
|
clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
|
|
271
|
-
|
|
272
|
-
|
|
299
|
+
|
|
300
|
+
|
|
273
301
|
## Optionally add pre-smoothing descriptions to every image
|
|
274
|
-
|
|
275
|
-
if options.add_pre_smoothing_description:
|
|
276
|
-
|
|
302
|
+
|
|
303
|
+
if options.add_pre_smoothing_description and (classification_descriptions is not None):
|
|
304
|
+
|
|
277
305
|
for im in tqdm(d['images']):
|
|
278
|
-
|
|
306
|
+
|
|
279
307
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
280
308
|
continue
|
|
281
|
-
|
|
282
|
-
detections = im['detections']
|
|
283
|
-
category_to_count =
|
|
284
|
-
|
|
309
|
+
|
|
310
|
+
detections = im['detections']
|
|
311
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
312
|
+
|
|
285
313
|
im['pre_smoothing_description'] = \
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
314
|
+
get_classification_description_string(category_to_count, classification_descriptions)
|
|
315
|
+
|
|
316
|
+
|
|
289
317
|
return {
|
|
290
318
|
'd':d,
|
|
291
319
|
'other_category_ids':other_category_ids,
|
|
@@ -293,7 +321,7 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
293
321
|
'classification_descriptions':classification_descriptions
|
|
294
322
|
}
|
|
295
323
|
|
|
296
|
-
# ...def _prepare_results_for_smoothing(...)
|
|
324
|
+
# ...def _prepare_results_for_smoothing(...)
|
|
297
325
|
|
|
298
326
|
|
|
299
327
|
def _smooth_classifications_for_list_of_detections(detections,
|
|
@@ -304,282 +332,323 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
304
332
|
"""
|
|
305
333
|
Smooth classifications for a list of detections, which may have come from a single
|
|
306
334
|
image, or may represent an entire sequence.
|
|
307
|
-
|
|
335
|
+
|
|
308
336
|
Returns None if no changes are made, else a dict.
|
|
309
|
-
|
|
310
|
-
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
337
|
+
|
|
338
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
311
339
|
from which common names and GUIDs have already been removed.
|
|
312
|
-
|
|
340
|
+
|
|
313
341
|
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
314
|
-
have already been remoevd.
|
|
342
|
+
have already been remoevd.
|
|
315
343
|
"""
|
|
316
|
-
|
|
344
|
+
|
|
317
345
|
## Count the number of instances of each category in this image
|
|
318
|
-
|
|
319
|
-
category_to_count =
|
|
346
|
+
|
|
347
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
320
348
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
321
|
-
#
|
|
322
|
-
|
|
349
|
+
# get_classification_description_string(category_to_count, classification_descriptions)
|
|
350
|
+
|
|
323
351
|
if len(category_to_count) <= 1:
|
|
324
352
|
return None
|
|
325
|
-
|
|
353
|
+
|
|
326
354
|
keys = list(category_to_count.keys())
|
|
327
|
-
|
|
328
|
-
# Handle a quirky special case: if the most common category is "other" and
|
|
355
|
+
|
|
356
|
+
# Handle a quirky special case: if the most common category is "other" and
|
|
329
357
|
# it's "tied" with the second-most-common category, swap them
|
|
330
358
|
if (len(keys) > 1) and \
|
|
331
359
|
(keys[0] in other_category_ids) and \
|
|
332
360
|
(keys[1] not in other_category_ids) and \
|
|
333
361
|
(category_to_count[keys[0]] == category_to_count[keys[1]]):
|
|
334
362
|
keys[1], keys[0] = keys[0], keys[1]
|
|
335
|
-
|
|
336
|
-
max_count = category_to_count[keys[0]]
|
|
363
|
+
|
|
364
|
+
max_count = category_to_count[keys[0]]
|
|
337
365
|
most_common_category = keys[0]
|
|
338
366
|
del keys
|
|
339
|
-
|
|
340
|
-
|
|
367
|
+
|
|
368
|
+
|
|
341
369
|
## Debug tools
|
|
342
|
-
|
|
370
|
+
|
|
343
371
|
verbose_debug_enabled = False
|
|
344
|
-
|
|
372
|
+
|
|
345
373
|
if options.break_at_image is not None:
|
|
346
374
|
for det in detections:
|
|
347
375
|
if 'image_filename' in det and \
|
|
348
376
|
det['image_filename'] == options.break_at_image:
|
|
349
377
|
verbose_debug_enabled = True
|
|
350
378
|
break
|
|
351
|
-
|
|
379
|
+
|
|
352
380
|
if verbose_debug_enabled:
|
|
353
381
|
_print_counts_with_names(category_to_count,classification_descriptions)
|
|
354
|
-
import
|
|
355
|
-
|
|
356
|
-
|
|
382
|
+
from IPython import embed; embed()
|
|
383
|
+
|
|
384
|
+
|
|
357
385
|
## Possibly change "other" classifications to the most common category
|
|
358
|
-
|
|
386
|
+
|
|
359
387
|
# ...if the dominant category is not an "other" category.
|
|
360
|
-
|
|
388
|
+
|
|
361
389
|
n_other_classifications_changed_this_image = 0
|
|
362
|
-
|
|
390
|
+
|
|
363
391
|
# If we have at least *min_detections_to_overwrite_other* in a category that isn't
|
|
364
392
|
# "other", change all "other" classifications to that category
|
|
365
393
|
if (max_count >= options.min_detections_to_overwrite_other) and \
|
|
366
394
|
(most_common_category not in other_category_ids):
|
|
367
|
-
|
|
395
|
+
|
|
368
396
|
for det in detections:
|
|
369
|
-
|
|
397
|
+
|
|
370
398
|
if ('classifications' not in det) or \
|
|
371
399
|
(det['conf'] < options.detection_confidence_threshold):
|
|
372
400
|
continue
|
|
373
|
-
|
|
401
|
+
|
|
374
402
|
assert len(det['classifications']) == 1
|
|
375
403
|
c = det['classifications'][0]
|
|
376
|
-
|
|
404
|
+
|
|
377
405
|
if (c[1] >= options.classification_confidence_threshold) and \
|
|
378
406
|
(c[0] in other_category_ids):
|
|
379
|
-
|
|
407
|
+
|
|
408
|
+
if verbose_debug_enabled:
|
|
409
|
+
print('Replacing {} with {}'.format(
|
|
410
|
+
classification_descriptions[c[0]],
|
|
411
|
+
classification_descriptions[c[1]]))
|
|
412
|
+
|
|
380
413
|
n_other_classifications_changed_this_image += 1
|
|
381
414
|
c[0] = most_common_category
|
|
382
|
-
|
|
415
|
+
|
|
383
416
|
# ...if there are classifications for this detection
|
|
384
|
-
|
|
417
|
+
|
|
385
418
|
# ...for each detection
|
|
386
|
-
|
|
419
|
+
|
|
387
420
|
# ...if we should overwrite all "other" classifications
|
|
388
|
-
|
|
389
|
-
|
|
421
|
+
|
|
422
|
+
if verbose_debug_enabled:
|
|
423
|
+
print('Made {} other changes'.format(n_other_classifications_changed_this_image))
|
|
424
|
+
|
|
425
|
+
|
|
390
426
|
## Re-count
|
|
391
|
-
|
|
392
|
-
category_to_count =
|
|
393
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
427
|
+
|
|
428
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
429
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
394
430
|
keys = list(category_to_count.keys())
|
|
395
|
-
max_count = category_to_count[keys[0]]
|
|
431
|
+
max_count = category_to_count[keys[0]]
|
|
396
432
|
most_common_category = keys[0]
|
|
397
433
|
del keys
|
|
398
|
-
|
|
399
|
-
|
|
434
|
+
|
|
435
|
+
|
|
400
436
|
## Possibly change some non-dominant classifications to the dominant category
|
|
401
|
-
|
|
437
|
+
|
|
438
|
+
process_taxonomic_rules = \
|
|
439
|
+
(classification_descriptions_clean is not None) and \
|
|
440
|
+
(len(classification_descriptions_clean) > 0) and \
|
|
441
|
+
(len(category_to_count) > 1)
|
|
442
|
+
|
|
402
443
|
n_detections_flipped_this_image = 0
|
|
403
|
-
|
|
404
|
-
# Don't do this if the most common category is an "other" category, or
|
|
444
|
+
|
|
445
|
+
# Don't do this if the most common category is an "other" category, or
|
|
405
446
|
# if we don't have enough of the most common category
|
|
406
447
|
if (most_common_category not in other_category_ids) and \
|
|
407
448
|
(max_count >= options.min_detections_to_overwrite_secondary):
|
|
408
|
-
|
|
449
|
+
|
|
409
450
|
# i_det = 0; det = detections[i_det]
|
|
410
451
|
for i_det,det in enumerate(detections):
|
|
411
|
-
|
|
452
|
+
|
|
412
453
|
if ('classifications' not in det) or \
|
|
413
454
|
(det['conf'] < options.detection_confidence_threshold):
|
|
414
455
|
continue
|
|
415
|
-
|
|
456
|
+
|
|
416
457
|
assert len(det['classifications']) == 1
|
|
417
458
|
c = det['classifications'][0]
|
|
418
|
-
|
|
459
|
+
|
|
419
460
|
# Don't over-write the most common category with itself
|
|
420
461
|
if c[0] == most_common_category:
|
|
421
462
|
continue
|
|
422
|
-
|
|
463
|
+
|
|
423
464
|
# Don't bother with below-threshold classifications
|
|
424
465
|
if c[1] < options.classification_confidence_threshold:
|
|
425
466
|
continue
|
|
426
|
-
|
|
467
|
+
|
|
468
|
+
# If we're doing taxonomic processing, at this stage, don't turn children
|
|
469
|
+
# into parents; we'll likely turn parents into children in the next stage.
|
|
470
|
+
|
|
471
|
+
if process_taxonomic_rules:
|
|
472
|
+
|
|
473
|
+
most_common_category_description = \
|
|
474
|
+
classification_descriptions_clean[most_common_category]
|
|
475
|
+
|
|
476
|
+
category_id_this_classification = c[0]
|
|
477
|
+
assert category_id_this_classification in category_to_count
|
|
478
|
+
|
|
479
|
+
category_description_this_classification = \
|
|
480
|
+
classification_descriptions_clean[category_id_this_classification]
|
|
481
|
+
|
|
482
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
483
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
484
|
+
# step above.
|
|
485
|
+
if len(category_description_this_classification) == 0:
|
|
486
|
+
continue
|
|
487
|
+
|
|
488
|
+
most_common_category_is_parent_of_this_category = \
|
|
489
|
+
most_common_category_description in category_description_this_classification
|
|
490
|
+
|
|
491
|
+
if most_common_category_is_parent_of_this_category:
|
|
492
|
+
continue
|
|
493
|
+
|
|
427
494
|
# If we have fewer of this category than the most common category,
|
|
428
495
|
# but not *too* many, flip it to the most common category.
|
|
429
496
|
if (max_count > category_to_count[c[0]]) and \
|
|
430
497
|
(category_to_count[c[0]] <= options.max_detections_nondominant_class):
|
|
431
|
-
|
|
498
|
+
|
|
432
499
|
c[0] = most_common_category
|
|
433
|
-
n_detections_flipped_this_image += 1
|
|
434
|
-
|
|
500
|
+
n_detections_flipped_this_image += 1
|
|
501
|
+
|
|
435
502
|
# ...for each detection
|
|
436
503
|
|
|
437
|
-
# ...if the dominant category is legit
|
|
438
|
-
|
|
439
|
-
|
|
504
|
+
# ...if the dominant category is legit
|
|
505
|
+
|
|
506
|
+
if verbose_debug_enabled:
|
|
507
|
+
print('Made {} non-dominant --> dominant changes'.format(
|
|
508
|
+
n_detections_flipped_this_image))
|
|
509
|
+
|
|
510
|
+
|
|
440
511
|
## Re-count
|
|
441
|
-
|
|
442
|
-
category_to_count =
|
|
443
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
512
|
+
|
|
513
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
514
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
444
515
|
keys = list(category_to_count.keys())
|
|
445
|
-
max_count = category_to_count[keys[0]]
|
|
516
|
+
max_count = category_to_count[keys[0]]
|
|
446
517
|
most_common_category = keys[0]
|
|
447
518
|
del keys
|
|
448
|
-
|
|
449
|
-
|
|
519
|
+
|
|
520
|
+
|
|
450
521
|
## Possibly collapse higher-level taxonomic predictions down to lower levels
|
|
451
|
-
|
|
452
|
-
# ...when the most common class is a child of a less common class.
|
|
453
|
-
|
|
522
|
+
|
|
454
523
|
n_taxonomic_changes_this_image = 0
|
|
455
|
-
|
|
524
|
+
|
|
456
525
|
process_taxonomic_rules = \
|
|
457
526
|
(classification_descriptions_clean is not None) and \
|
|
458
527
|
(len(classification_descriptions_clean) > 0) and \
|
|
459
528
|
(len(category_to_count) > 1)
|
|
460
|
-
|
|
529
|
+
|
|
461
530
|
if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
|
|
462
|
-
|
|
531
|
+
|
|
463
532
|
# det = detections[3]
|
|
464
533
|
for det in detections:
|
|
465
|
-
|
|
534
|
+
|
|
466
535
|
if ('classifications' not in det) or \
|
|
467
536
|
(det['conf'] < options.detection_confidence_threshold):
|
|
468
537
|
continue
|
|
469
|
-
|
|
538
|
+
|
|
470
539
|
assert len(det['classifications']) == 1
|
|
471
540
|
c = det['classifications'][0]
|
|
472
|
-
|
|
541
|
+
|
|
473
542
|
# Don't bother with any classifications below the confidence threshold
|
|
474
543
|
if c[1] < options.classification_confidence_threshold:
|
|
475
544
|
continue
|
|
476
545
|
|
|
477
546
|
category_id_this_classification = c[0]
|
|
478
547
|
assert category_id_this_classification in category_to_count
|
|
479
|
-
|
|
548
|
+
|
|
480
549
|
category_description_this_classification = \
|
|
481
550
|
classification_descriptions_clean[category_id_this_classification]
|
|
482
|
-
|
|
483
|
-
# An empty description corresponds to the "animal" category. We don't handle
|
|
484
|
-
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
551
|
+
|
|
552
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
553
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
485
554
|
# step above.
|
|
486
555
|
if len(category_description_this_classification) == 0:
|
|
487
556
|
continue
|
|
488
|
-
|
|
557
|
+
|
|
489
558
|
# We may have multiple child categories to choose from; this keeps track of
|
|
490
559
|
# the "best" we've seen so far. "Best" is based on the level (species is better
|
|
491
560
|
# than genus) and number.
|
|
492
561
|
child_category_to_score = defaultdict(float)
|
|
493
|
-
|
|
562
|
+
|
|
494
563
|
for category_id_of_candidate_child in category_to_count.keys():
|
|
495
|
-
|
|
564
|
+
|
|
496
565
|
# A category is never its own child
|
|
497
566
|
if category_id_of_candidate_child == category_id_this_classification:
|
|
498
567
|
continue
|
|
499
|
-
|
|
568
|
+
|
|
500
569
|
# Is this candidate a child of the current classification?
|
|
501
570
|
category_description_candidate_child = \
|
|
502
571
|
classification_descriptions_clean[category_id_of_candidate_child]
|
|
503
|
-
|
|
572
|
+
|
|
504
573
|
# An empty description corresponds to "animal", which can never
|
|
505
574
|
# be a child of another category.
|
|
506
575
|
if len(category_description_candidate_child) == 0:
|
|
507
576
|
continue
|
|
508
|
-
|
|
509
|
-
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
577
|
+
|
|
578
|
+
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
510
579
|
# relationships are defined by a substring relationship
|
|
511
580
|
is_child = category_description_this_classification in \
|
|
512
581
|
category_description_candidate_child
|
|
513
582
|
if not is_child:
|
|
514
583
|
continue
|
|
515
|
-
|
|
584
|
+
|
|
516
585
|
# How many instances of this child category are there?
|
|
517
586
|
child_category_count = category_to_count[category_id_of_candidate_child]
|
|
518
|
-
|
|
587
|
+
|
|
519
588
|
# What taxonomy level is this child category defined at?
|
|
520
589
|
child_category_level = taxonomy_level_index(
|
|
521
590
|
classification_descriptions[category_id_of_candidate_child])
|
|
522
|
-
|
|
591
|
+
|
|
523
592
|
child_category_to_score[category_id_of_candidate_child] = \
|
|
524
593
|
child_category_level * options.taxonomy_propagation_level_weight + \
|
|
525
594
|
child_category_count * options.taxonomy_propagation_count_weight
|
|
526
|
-
|
|
595
|
+
|
|
527
596
|
# ...for each category we are considering reducing this classification to
|
|
528
|
-
|
|
597
|
+
|
|
529
598
|
# Did we find a category we want to change this classification to?
|
|
530
599
|
if len(child_category_to_score) > 0:
|
|
531
|
-
|
|
600
|
+
|
|
532
601
|
# Find the child category with the highest score
|
|
533
602
|
child_category_to_score = sort_dictionary_by_value(
|
|
534
603
|
child_category_to_score,reverse=True)
|
|
535
604
|
best_child_category = next(iter(child_category_to_score.keys()))
|
|
536
|
-
|
|
605
|
+
|
|
537
606
|
if verbose_debug_enabled:
|
|
538
607
|
old_category_name = \
|
|
539
608
|
classification_descriptions_clean[c[0]]
|
|
540
609
|
new_category_name = \
|
|
541
610
|
classification_descriptions_clean[best_child_category]
|
|
542
611
|
print('Replacing {} with {}'.format(
|
|
543
|
-
old_category_name,new_category_name))
|
|
544
|
-
|
|
612
|
+
old_category_name,new_category_name))
|
|
613
|
+
|
|
545
614
|
c[0] = best_child_category
|
|
546
|
-
n_taxonomic_changes_this_image += 1
|
|
547
|
-
|
|
615
|
+
n_taxonomic_changes_this_image += 1
|
|
616
|
+
|
|
548
617
|
# ...for each detection
|
|
549
|
-
|
|
550
|
-
# ...if we have taxonomic information available
|
|
551
|
-
|
|
552
|
-
|
|
618
|
+
|
|
619
|
+
# ...if we have taxonomic information available
|
|
620
|
+
|
|
621
|
+
|
|
553
622
|
## Re-count
|
|
554
|
-
|
|
555
|
-
category_to_count =
|
|
556
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
623
|
+
|
|
624
|
+
category_to_count = count_detections_by_classification_category(detections, options)
|
|
625
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
557
626
|
keys = list(category_to_count.keys())
|
|
558
|
-
max_count = category_to_count[keys[0]]
|
|
627
|
+
max_count = category_to_count[keys[0]]
|
|
559
628
|
most_common_category = keys[0]
|
|
560
629
|
del keys
|
|
561
|
-
|
|
562
|
-
|
|
630
|
+
|
|
631
|
+
|
|
563
632
|
## Possibly do within-family smoothing
|
|
564
|
-
|
|
633
|
+
|
|
565
634
|
n_within_family_smoothing_changes = 0
|
|
566
|
-
|
|
635
|
+
|
|
567
636
|
# min_detections_to_overwrite_secondary_same_family = -1
|
|
568
637
|
# max_detections_nondominant_class_same_family = 1
|
|
569
638
|
family_level = taxonomy_level_string_to_index('family')
|
|
570
|
-
|
|
639
|
+
|
|
571
640
|
if process_taxonomic_rules:
|
|
572
|
-
|
|
641
|
+
|
|
573
642
|
category_description_most_common_category = \
|
|
574
643
|
classification_descriptions[most_common_category]
|
|
575
644
|
most_common_category_taxonomic_level = \
|
|
576
|
-
taxonomy_level_index(category_description_most_common_category)
|
|
645
|
+
taxonomy_level_index(category_description_most_common_category)
|
|
577
646
|
n_most_common_category = category_to_count[most_common_category]
|
|
578
647
|
tokens = category_description_most_common_category.split(';')
|
|
579
648
|
assert len(tokens) == 7
|
|
580
649
|
most_common_category_family = tokens[3]
|
|
581
650
|
most_common_category_genus = tokens[4]
|
|
582
|
-
|
|
651
|
+
|
|
583
652
|
# Only consider remapping to genus or species level, and only when we have
|
|
584
653
|
# a high enough count in the most common category
|
|
585
654
|
if process_taxonomic_rules and \
|
|
@@ -587,36 +656,36 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
587
656
|
(most_common_category not in other_category_ids) and \
|
|
588
657
|
(most_common_category_taxonomic_level > family_level) and \
|
|
589
658
|
(n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
|
|
590
|
-
|
|
659
|
+
|
|
591
660
|
# det = detections[0]
|
|
592
661
|
for det in detections:
|
|
593
|
-
|
|
662
|
+
|
|
594
663
|
if ('classifications' not in det) or \
|
|
595
664
|
(det['conf'] < options.detection_confidence_threshold):
|
|
596
665
|
continue
|
|
597
|
-
|
|
666
|
+
|
|
598
667
|
assert len(det['classifications']) == 1
|
|
599
668
|
c = det['classifications'][0]
|
|
600
|
-
|
|
669
|
+
|
|
601
670
|
# Don't over-write the most common category with itself
|
|
602
671
|
if c[0] == most_common_category:
|
|
603
672
|
continue
|
|
604
|
-
|
|
673
|
+
|
|
605
674
|
# Don't bother with below-threshold classifications
|
|
606
675
|
if c[1] < options.classification_confidence_threshold:
|
|
607
|
-
continue
|
|
608
|
-
|
|
676
|
+
continue
|
|
677
|
+
|
|
609
678
|
n_candidate_flip_category = category_to_count[c[0]]
|
|
610
|
-
|
|
679
|
+
|
|
611
680
|
# Do we have too many of the non-dominant category to do this kind of swap?
|
|
612
681
|
if n_candidate_flip_category > \
|
|
613
682
|
options.max_detections_nondominant_class_same_family:
|
|
614
683
|
continue
|
|
615
684
|
|
|
616
|
-
# Don't flip classes when it's a tie
|
|
685
|
+
# Don't flip classes when it's a tie
|
|
617
686
|
if n_candidate_flip_category == n_most_common_category:
|
|
618
687
|
continue
|
|
619
|
-
|
|
688
|
+
|
|
620
689
|
category_description_candidate_flip = \
|
|
621
690
|
classification_descriptions[c[0]]
|
|
622
691
|
tokens = category_description_candidate_flip.split(';')
|
|
@@ -624,34 +693,34 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
624
693
|
candidate_flip_category_family = tokens[3]
|
|
625
694
|
candidate_flip_category_genus = tokens[4]
|
|
626
695
|
candidate_flip_category_taxonomic_level = \
|
|
627
|
-
taxonomy_level_index(category_description_candidate_flip)
|
|
628
|
-
|
|
696
|
+
taxonomy_level_index(category_description_candidate_flip)
|
|
697
|
+
|
|
629
698
|
# Only proceed if we have valid family strings
|
|
630
699
|
if (len(candidate_flip_category_family) == 0) or \
|
|
631
700
|
(len(most_common_category_family) == 0):
|
|
632
701
|
continue
|
|
633
|
-
|
|
634
|
-
# Only proceed if the candidate and the most common category are in the same family
|
|
702
|
+
|
|
703
|
+
# Only proceed if the candidate and the most common category are in the same family
|
|
635
704
|
if candidate_flip_category_family != most_common_category_family:
|
|
636
705
|
continue
|
|
637
|
-
|
|
706
|
+
|
|
638
707
|
# Don't flip from a species to the genus level in the same genus
|
|
639
708
|
if (candidate_flip_category_genus == most_common_category_genus) and \
|
|
640
709
|
(candidate_flip_category_taxonomic_level > \
|
|
641
710
|
most_common_category_taxonomic_level):
|
|
642
711
|
continue
|
|
643
|
-
|
|
712
|
+
|
|
644
713
|
old_category_name = classification_descriptions_clean[c[0]]
|
|
645
714
|
new_category_name = classification_descriptions_clean[most_common_category]
|
|
646
|
-
|
|
715
|
+
|
|
647
716
|
c[0] = most_common_category
|
|
648
|
-
n_within_family_smoothing_changes += 1
|
|
649
|
-
|
|
717
|
+
n_within_family_smoothing_changes += 1
|
|
718
|
+
|
|
650
719
|
# ...for each detection
|
|
651
|
-
|
|
720
|
+
|
|
652
721
|
# ...if the dominant category is legit and we have taxonomic information available
|
|
653
|
-
|
|
654
|
-
|
|
722
|
+
|
|
723
|
+
|
|
655
724
|
return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
|
|
656
725
|
'n_detections_flipped_this_image':n_detections_flipped_this_image,
|
|
657
726
|
'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
|
|
@@ -668,33 +737,33 @@ def _smooth_single_image(im,
|
|
|
668
737
|
"""
|
|
669
738
|
Smooth classifications for a single image. Returns None if no changes are made,
|
|
670
739
|
else a dict.
|
|
671
|
-
|
|
672
|
-
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
740
|
+
|
|
741
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
673
742
|
from which common names and GUIDs have already been removed.
|
|
674
|
-
|
|
743
|
+
|
|
675
744
|
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
676
745
|
have already been remoevd.
|
|
677
746
|
"""
|
|
678
|
-
|
|
747
|
+
|
|
679
748
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
680
749
|
return
|
|
681
|
-
|
|
750
|
+
|
|
682
751
|
detections = im['detections']
|
|
683
|
-
|
|
752
|
+
|
|
684
753
|
# Simplify debugging
|
|
685
754
|
for det in detections:
|
|
686
755
|
det['image_filename'] = im['file']
|
|
687
|
-
|
|
688
|
-
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
689
|
-
options=options,
|
|
756
|
+
|
|
757
|
+
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
758
|
+
options=options,
|
|
690
759
|
other_category_ids=other_category_ids,
|
|
691
|
-
classification_descriptions=classification_descriptions,
|
|
760
|
+
classification_descriptions=classification_descriptions,
|
|
692
761
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
693
762
|
|
|
694
763
|
# Clean out debug information
|
|
695
764
|
for det in detections:
|
|
696
765
|
del det['image_filename']
|
|
697
|
-
|
|
766
|
+
|
|
698
767
|
return to_return
|
|
699
768
|
|
|
700
769
|
# ...def smooth_single_image
|
|
@@ -706,104 +775,104 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
706
775
|
"""
|
|
707
776
|
Smooth classifications at the image level for all results in the MD-formatted results
|
|
708
777
|
file [input_file], optionally writing a new set of results to [output_file].
|
|
709
|
-
|
|
710
|
-
This function generally expresses the notion that an image with 700 cows and one deer
|
|
778
|
+
|
|
779
|
+
This function generally expresses the notion that an image with 700 cows and one deer
|
|
711
780
|
is really just 701 cows.
|
|
712
|
-
|
|
781
|
+
|
|
713
782
|
Only count detections with a classification confidence threshold above
|
|
714
783
|
[options.classification_confidence_threshold], which in practice means we're only
|
|
715
784
|
looking at one category per detection.
|
|
716
|
-
|
|
785
|
+
|
|
717
786
|
If an image has at least [options.min_detections_to_overwrite_secondary] such detections
|
|
718
787
|
in the most common category, and no more than [options.max_detections_nondominant_class]
|
|
719
788
|
in the second-most-common category, flip all detections to the most common
|
|
720
789
|
category.
|
|
721
|
-
|
|
722
|
-
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
790
|
+
|
|
791
|
+
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
723
792
|
"other" class.
|
|
724
|
-
|
|
793
|
+
|
|
725
794
|
This function also removes everything but the non-dominant classification for each detection.
|
|
726
|
-
|
|
795
|
+
|
|
727
796
|
Args:
|
|
728
797
|
input_file (str): MegaDetector-formatted classification results file to smooth. Can
|
|
729
798
|
also be an already-loaded results dict.
|
|
730
799
|
output_file (str, optional): .json file to write smoothed results
|
|
731
|
-
options (ClassificationSmoothingOptions, optional): see
|
|
800
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
732
801
|
ClassificationSmoothingOptions for details.
|
|
733
|
-
|
|
802
|
+
|
|
734
803
|
Returns:
|
|
735
804
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
736
805
|
[output_file] if [output_file] is not None.
|
|
737
806
|
"""
|
|
738
|
-
|
|
807
|
+
|
|
739
808
|
## Input validation
|
|
740
|
-
|
|
809
|
+
|
|
741
810
|
if options is None:
|
|
742
811
|
options = ClassificationSmoothingOptions()
|
|
743
|
-
|
|
812
|
+
|
|
744
813
|
r = _prepare_results_for_smoothing(input_file, options)
|
|
745
814
|
d = r['d']
|
|
746
815
|
other_category_ids = r['other_category_ids']
|
|
747
816
|
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
748
817
|
classification_descriptions = r['classification_descriptions']
|
|
749
|
-
|
|
750
|
-
|
|
818
|
+
|
|
819
|
+
|
|
751
820
|
## Smoothing
|
|
752
|
-
|
|
821
|
+
|
|
753
822
|
n_other_classifications_changed = 0
|
|
754
823
|
n_other_images_changed = 0
|
|
755
824
|
n_taxonomic_images_changed = 0
|
|
756
|
-
|
|
825
|
+
|
|
757
826
|
n_detections_flipped = 0
|
|
758
827
|
n_images_changed = 0
|
|
759
|
-
n_taxonomic_classification_changes = 0
|
|
760
|
-
|
|
761
|
-
# im = d['images'][0]
|
|
828
|
+
n_taxonomic_classification_changes = 0
|
|
829
|
+
|
|
830
|
+
# im = d['images'][0]
|
|
762
831
|
for im in tqdm(d['images']):
|
|
763
|
-
|
|
832
|
+
|
|
764
833
|
r = _smooth_single_image(im,
|
|
765
834
|
options,
|
|
766
835
|
other_category_ids,
|
|
767
836
|
classification_descriptions=classification_descriptions,
|
|
768
837
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
769
|
-
|
|
838
|
+
|
|
770
839
|
if r is None:
|
|
771
840
|
continue
|
|
772
|
-
|
|
841
|
+
|
|
773
842
|
n_detections_flipped_this_image = r['n_detections_flipped_this_image']
|
|
774
843
|
n_other_classifications_changed_this_image = \
|
|
775
844
|
r['n_other_classifications_changed_this_image']
|
|
776
845
|
n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
|
|
777
|
-
|
|
846
|
+
|
|
778
847
|
n_detections_flipped += n_detections_flipped_this_image
|
|
779
848
|
n_other_classifications_changed += n_other_classifications_changed_this_image
|
|
780
849
|
n_taxonomic_classification_changes += n_taxonomic_changes_this_image
|
|
781
|
-
|
|
850
|
+
|
|
782
851
|
if n_detections_flipped_this_image > 0:
|
|
783
852
|
n_images_changed += 1
|
|
784
853
|
if n_other_classifications_changed_this_image > 0:
|
|
785
854
|
n_other_images_changed += 1
|
|
786
855
|
if n_taxonomic_changes_this_image > 0:
|
|
787
856
|
n_taxonomic_images_changed += 1
|
|
788
|
-
|
|
789
|
-
# ...for each image
|
|
790
|
-
|
|
857
|
+
|
|
858
|
+
# ...for each image
|
|
859
|
+
|
|
791
860
|
print('Classification smoothing: changed {} detections on {} images'.format(
|
|
792
861
|
n_detections_flipped,n_images_changed))
|
|
793
|
-
|
|
862
|
+
|
|
794
863
|
print('"Other" smoothing: changed {} detections on {} images'.format(
|
|
795
864
|
n_other_classifications_changed,n_other_images_changed))
|
|
796
|
-
|
|
865
|
+
|
|
797
866
|
print('Taxonomic smoothing: changed {} detections on {} images'.format(
|
|
798
867
|
n_taxonomic_classification_changes,n_taxonomic_images_changed))
|
|
799
|
-
|
|
800
|
-
|
|
868
|
+
|
|
869
|
+
|
|
801
870
|
## Write output
|
|
802
|
-
|
|
803
|
-
if output_file is not None:
|
|
871
|
+
|
|
872
|
+
if output_file is not None:
|
|
804
873
|
print('Writing results after image-level smoothing to:\n{}'.format(output_file))
|
|
805
874
|
with open(output_file,'w') as f:
|
|
806
|
-
json.dump(d,f,indent=1)
|
|
875
|
+
json.dump(d,f,indent=1)
|
|
807
876
|
|
|
808
877
|
return d
|
|
809
878
|
|
|
@@ -811,7 +880,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
811
880
|
|
|
812
881
|
|
|
813
882
|
#%% Sequence-level smoothing
|
|
814
|
-
|
|
883
|
+
|
|
815
884
|
def smooth_classification_results_sequence_level(input_file,
|
|
816
885
|
cct_sequence_information,
|
|
817
886
|
output_file=None,
|
|
@@ -819,10 +888,10 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
819
888
|
"""
|
|
820
889
|
Smooth classifications at the sequence level for all results in the MD-formatted results
|
|
821
890
|
file [md_results_file], optionally writing a new set of results to [output_file].
|
|
822
|
-
|
|
891
|
+
|
|
823
892
|
This function generally expresses the notion that a sequence that looks like
|
|
824
893
|
deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
|
|
825
|
-
|
|
894
|
+
|
|
826
895
|
Args:
|
|
827
896
|
input_file (str or dict): MegaDetector-formatted classification results file to smooth
|
|
828
897
|
(or already-loaded results). If you supply a dict, it's modified in place by default, but
|
|
@@ -830,28 +899,28 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
830
899
|
cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
|
|
831
900
|
each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
|
|
832
901
|
output_file (str, optional): .json file to write smoothed results
|
|
833
|
-
options (ClassificationSmoothingOptions, optional): see
|
|
902
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
834
903
|
ClassificationSmoothingOptions for details.
|
|
835
|
-
|
|
904
|
+
|
|
836
905
|
Returns:
|
|
837
906
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
838
907
|
[output_file] if [output_file] is not None.
|
|
839
908
|
"""
|
|
840
|
-
|
|
909
|
+
|
|
841
910
|
## Input validation
|
|
842
|
-
|
|
911
|
+
|
|
843
912
|
if options is None:
|
|
844
913
|
options = ClassificationSmoothingOptions()
|
|
845
|
-
|
|
914
|
+
|
|
846
915
|
r = _prepare_results_for_smoothing(input_file, options)
|
|
847
916
|
d = r['d']
|
|
848
917
|
other_category_ids = r['other_category_ids']
|
|
849
918
|
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
850
919
|
classification_descriptions = r['classification_descriptions']
|
|
851
|
-
|
|
852
|
-
|
|
920
|
+
|
|
921
|
+
|
|
853
922
|
## Make a list of images appearing in each sequence
|
|
854
|
-
|
|
923
|
+
|
|
855
924
|
if isinstance(cct_sequence_information,list):
|
|
856
925
|
image_info = cct_sequence_information
|
|
857
926
|
elif isinstance(cct_sequence_information,str):
|
|
@@ -862,77 +931,77 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
862
931
|
else:
|
|
863
932
|
assert isinstance(cct_sequence_information,dict)
|
|
864
933
|
image_info = cct_sequence_information['images']
|
|
865
|
-
|
|
934
|
+
|
|
866
935
|
sequence_to_image_filenames = defaultdict(list)
|
|
867
|
-
|
|
936
|
+
|
|
868
937
|
# im = image_info[0]
|
|
869
938
|
for im in tqdm(image_info):
|
|
870
|
-
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
939
|
+
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
871
940
|
del image_info
|
|
872
|
-
|
|
941
|
+
|
|
873
942
|
image_fn_to_classification_results = {}
|
|
874
943
|
for im in d['images']:
|
|
875
944
|
fn = im['file']
|
|
876
945
|
assert fn not in image_fn_to_classification_results
|
|
877
946
|
image_fn_to_classification_results[fn] = im
|
|
878
|
-
|
|
879
|
-
|
|
947
|
+
|
|
948
|
+
|
|
880
949
|
## Smoothing
|
|
881
|
-
|
|
950
|
+
|
|
882
951
|
n_other_classifications_changed = 0
|
|
883
952
|
n_other_sequences_changed = 0
|
|
884
953
|
n_taxonomic_sequences_changed = 0
|
|
885
954
|
n_within_family_sequences_changed = 0
|
|
886
|
-
|
|
955
|
+
|
|
887
956
|
n_detections_flipped = 0
|
|
888
957
|
n_sequences_changed = 0
|
|
889
|
-
n_taxonomic_classification_changes = 0
|
|
890
|
-
n_within_family_changes = 0
|
|
891
|
-
|
|
958
|
+
n_taxonomic_classification_changes = 0
|
|
959
|
+
n_within_family_changes = 0
|
|
960
|
+
|
|
892
961
|
# sequence_id = list(sequence_to_image_filenames.keys())[0]
|
|
893
962
|
for sequence_id in sequence_to_image_filenames.keys():
|
|
894
963
|
|
|
895
964
|
image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
|
|
896
|
-
|
|
965
|
+
|
|
897
966
|
# if 'file' in image_filenames_this_sequence:
|
|
898
|
-
# import
|
|
899
|
-
|
|
967
|
+
# from IPython import embed; embed()
|
|
968
|
+
|
|
900
969
|
detections_this_sequence = []
|
|
901
970
|
for image_filename in image_filenames_this_sequence:
|
|
902
971
|
im = image_fn_to_classification_results[image_filename]
|
|
903
972
|
if 'detections' not in im or im['detections'] is None:
|
|
904
973
|
continue
|
|
905
974
|
detections_this_sequence.extend(im['detections'])
|
|
906
|
-
|
|
975
|
+
|
|
907
976
|
# Temporarily add image filenames to every detection,
|
|
908
977
|
# for debugging
|
|
909
978
|
for det in im['detections']:
|
|
910
979
|
det['image_filename'] = im['file']
|
|
911
|
-
|
|
980
|
+
|
|
912
981
|
if len(detections_this_sequence) == 0:
|
|
913
982
|
continue
|
|
914
|
-
|
|
983
|
+
|
|
915
984
|
r = _smooth_classifications_for_list_of_detections(
|
|
916
|
-
detections=detections_this_sequence,
|
|
917
|
-
options=options,
|
|
985
|
+
detections=detections_this_sequence,
|
|
986
|
+
options=options,
|
|
918
987
|
other_category_ids=other_category_ids,
|
|
919
|
-
classification_descriptions=classification_descriptions,
|
|
988
|
+
classification_descriptions=classification_descriptions,
|
|
920
989
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
921
|
-
|
|
990
|
+
|
|
922
991
|
if r is None:
|
|
923
992
|
continue
|
|
924
|
-
|
|
993
|
+
|
|
925
994
|
n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
|
|
926
995
|
n_other_classifications_changed_this_sequence = \
|
|
927
996
|
r['n_other_classifications_changed_this_image']
|
|
928
997
|
n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
|
|
929
998
|
n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
|
|
930
|
-
|
|
999
|
+
|
|
931
1000
|
n_detections_flipped += n_detections_flipped_this_sequence
|
|
932
1001
|
n_other_classifications_changed += n_other_classifications_changed_this_sequence
|
|
933
1002
|
n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
|
|
934
1003
|
n_within_family_changes += n_within_family_changes_this_sequence
|
|
935
|
-
|
|
1004
|
+
|
|
936
1005
|
if n_detections_flipped_this_sequence > 0:
|
|
937
1006
|
n_sequences_changed += 1
|
|
938
1007
|
if n_other_classifications_changed_this_sequence > 0:
|
|
@@ -941,40 +1010,468 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
941
1010
|
n_taxonomic_sequences_changed += 1
|
|
942
1011
|
if n_within_family_changes_this_sequence > 0:
|
|
943
1012
|
n_within_family_sequences_changed += 1
|
|
944
|
-
|
|
1013
|
+
|
|
945
1014
|
# ...for each sequence
|
|
946
|
-
|
|
1015
|
+
|
|
947
1016
|
print('Classification smoothing: changed {} detections in {} sequences'.format(
|
|
948
1017
|
n_detections_flipped,n_sequences_changed))
|
|
949
|
-
|
|
1018
|
+
|
|
950
1019
|
print('"Other" smoothing: changed {} detections in {} sequences'.format(
|
|
951
1020
|
n_other_classifications_changed,n_other_sequences_changed))
|
|
952
|
-
|
|
1021
|
+
|
|
953
1022
|
print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
|
|
954
1023
|
n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
|
|
955
1024
|
|
|
956
1025
|
print('Within-family smoothing: changed {} detections in {} sequences'.format(
|
|
957
1026
|
n_within_family_changes,n_within_family_sequences_changed))
|
|
958
|
-
|
|
959
|
-
|
|
1027
|
+
|
|
1028
|
+
|
|
960
1029
|
## Clean up debug information
|
|
961
|
-
|
|
1030
|
+
|
|
962
1031
|
for im in d['images']:
|
|
963
1032
|
if 'detections' not in im or im['detections'] is None:
|
|
964
1033
|
continue
|
|
965
1034
|
for det in im['detections']:
|
|
966
1035
|
if 'image_filename' in det:
|
|
967
1036
|
del det['image_filename']
|
|
968
|
-
|
|
1037
|
+
|
|
969
1038
|
|
|
970
1039
|
## Write output
|
|
971
|
-
|
|
972
|
-
if output_file is not None:
|
|
1040
|
+
|
|
1041
|
+
if output_file is not None:
|
|
973
1042
|
print('Writing sequence-smoothed classification results to {}'.format(
|
|
974
|
-
output_file))
|
|
1043
|
+
output_file))
|
|
975
1044
|
with open(output_file,'w') as f:
|
|
976
1045
|
json.dump(d,f,indent=1)
|
|
977
|
-
|
|
1046
|
+
|
|
978
1047
|
return d
|
|
979
1048
|
|
|
980
1049
|
# ...smooth_classification_results_sequence_level(...)
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
def restrict_to_taxa_list(taxa_list,
|
|
1053
|
+
speciesnet_taxonomy_file,
|
|
1054
|
+
input_file,
|
|
1055
|
+
output_file,
|
|
1056
|
+
allow_walk_down=False,
|
|
1057
|
+
add_pre_filtering_description=True):
|
|
1058
|
+
"""
|
|
1059
|
+
Given a prediction file in MD .json format, likely without having had
|
|
1060
|
+
a geofence applied, apply a custom taxa list.
|
|
1061
|
+
|
|
1062
|
+
Args:
|
|
1063
|
+
taxa_list (str or list): list of latin names, or a text file containing
|
|
1064
|
+
a list of latin names. Optionally may contain a second (comma-delimited)
|
|
1065
|
+
column containing common names, used only for debugging. Latin names
|
|
1066
|
+
must exist in the SpeciesNet taxonomy.
|
|
1067
|
+
speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
|
|
1068
|
+
model release (with 7-token taxonomy entries)
|
|
1069
|
+
input_file (str): .json file to read, in MD format. This can be None, in which
|
|
1070
|
+
case this function just validates [taxa_list].
|
|
1071
|
+
output_file (str): .json file to write, in MD format
|
|
1072
|
+
allow_walk_down (bool, optional): should we walk down the taxonomy tree
|
|
1073
|
+
when making mappings if a parent has only a single allowable child?
|
|
1074
|
+
For example, if only a single felid species is allowed, should other
|
|
1075
|
+
felid predictions be mapped to that species, as opposed to being mapped
|
|
1076
|
+
to the family?
|
|
1077
|
+
add_pre_restriction_description (bool, optional): should we add a new metadata
|
|
1078
|
+
field that summarizes each image's classifications prior to taxonomic
|
|
1079
|
+
restriction?
|
|
1080
|
+
"""
|
|
1081
|
+
|
|
1082
|
+
##%% Read target taxa list
|
|
1083
|
+
|
|
1084
|
+
if isinstance(taxa_list,str):
|
|
1085
|
+
assert os.path.isfile(taxa_list), \
|
|
1086
|
+
'Could not find taxa list file {}'.format(taxa_list)
|
|
1087
|
+
with open(taxa_list,'r') as f:
|
|
1088
|
+
taxa_list = f.readlines()
|
|
1089
|
+
|
|
1090
|
+
taxa_list = [s.strip().lower() for s in taxa_list]
|
|
1091
|
+
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
1092
|
+
|
|
1093
|
+
target_latin_to_common = {}
|
|
1094
|
+
for s in taxa_list:
|
|
1095
|
+
if s.strip().startswith('#'):
|
|
1096
|
+
continue
|
|
1097
|
+
tokens = s.split(',')
|
|
1098
|
+
assert len(tokens) <= 2
|
|
1099
|
+
binomial_name = tokens[0]
|
|
1100
|
+
assert len(binomial_name.split(' ')) in (1,2,3), \
|
|
1101
|
+
'Illegal binomial name in species list: {}'.format(binomial_name)
|
|
1102
|
+
if len(tokens) > 0:
|
|
1103
|
+
common_name = tokens[1].strip().lower()
|
|
1104
|
+
else:
|
|
1105
|
+
common_name = None
|
|
1106
|
+
assert binomial_name not in target_latin_to_common
|
|
1107
|
+
target_latin_to_common[binomial_name] = common_name
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
##%% Read taxonomy file
|
|
1111
|
+
|
|
1112
|
+
with open(speciesnet_taxonomy_file,'r') as f:
|
|
1113
|
+
speciesnet_taxonomy_list = f.readlines()
|
|
1114
|
+
speciesnet_taxonomy_list = [s.strip() for s in \
|
|
1115
|
+
speciesnet_taxonomy_list if len(s.strip()) > 0]
|
|
1116
|
+
|
|
1117
|
+
# Maps the latin name of every taxon to the corresponding full taxon string
|
|
1118
|
+
#
|
|
1119
|
+
# For species, the key is a binomial name
|
|
1120
|
+
speciesnet_latin_name_to_taxon_string = {}
|
|
1121
|
+
speciesnet_common_name_to_taxon_string = {}
|
|
1122
|
+
|
|
1123
|
+
def _insert_taxonomy_string(s):
|
|
1124
|
+
|
|
1125
|
+
tokens = s.split(';')
|
|
1126
|
+
assert len(tokens) == 7
|
|
1127
|
+
|
|
1128
|
+
guid = tokens[0] # noqa
|
|
1129
|
+
class_name = tokens[1]
|
|
1130
|
+
order = tokens[2]
|
|
1131
|
+
family = tokens[3]
|
|
1132
|
+
genus = tokens[4]
|
|
1133
|
+
species = tokens[5]
|
|
1134
|
+
common_name = tokens[6]
|
|
1135
|
+
|
|
1136
|
+
if len(class_name) == 0:
|
|
1137
|
+
assert common_name in ('animal','vehicle','blank')
|
|
1138
|
+
return
|
|
1139
|
+
|
|
1140
|
+
if len(species) > 0:
|
|
1141
|
+
assert all([len(s) > 0 for s in [genus,family,order]])
|
|
1142
|
+
binomial_name = genus + ' ' + species
|
|
1143
|
+
if binomial_name not in speciesnet_latin_name_to_taxon_string:
|
|
1144
|
+
speciesnet_latin_name_to_taxon_string[binomial_name] = s
|
|
1145
|
+
elif len(genus) > 0:
|
|
1146
|
+
assert all([len(s) > 0 for s in [family,order]])
|
|
1147
|
+
if genus not in speciesnet_latin_name_to_taxon_string:
|
|
1148
|
+
speciesnet_latin_name_to_taxon_string[genus] = s
|
|
1149
|
+
elif len(family) > 0:
|
|
1150
|
+
assert len(order) > 0
|
|
1151
|
+
if family not in speciesnet_latin_name_to_taxon_string:
|
|
1152
|
+
speciesnet_latin_name_to_taxon_string[family] = s
|
|
1153
|
+
elif len(order) > 0:
|
|
1154
|
+
if order not in speciesnet_latin_name_to_taxon_string:
|
|
1155
|
+
speciesnet_latin_name_to_taxon_string[order] = s
|
|
1156
|
+
else:
|
|
1157
|
+
if class_name not in speciesnet_latin_name_to_taxon_string:
|
|
1158
|
+
speciesnet_latin_name_to_taxon_string[class_name] = s
|
|
1159
|
+
|
|
1160
|
+
if len(common_name) > 0:
|
|
1161
|
+
if common_name not in speciesnet_common_name_to_taxon_string:
|
|
1162
|
+
speciesnet_common_name_to_taxon_string[common_name] = s
|
|
1163
|
+
|
|
1164
|
+
for s in speciesnet_taxonomy_list:
|
|
1165
|
+
|
|
1166
|
+
_insert_taxonomy_string(s)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
##%% Make sure all parent taxa are represented in the taxonomy
|
|
1170
|
+
|
|
1171
|
+
# In theory any taxon that appears as the parent of another taxon should
|
|
1172
|
+
# also be in the taxonomy, but this isn't always true, so we fix it here.
|
|
1173
|
+
|
|
1174
|
+
new_taxon_string_to_missing_tokens = defaultdict(list)
|
|
1175
|
+
|
|
1176
|
+
# latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
|
|
1177
|
+
for latin_name in speciesnet_latin_name_to_taxon_string.keys():
|
|
1178
|
+
|
|
1179
|
+
if 'no cv result' in latin_name:
|
|
1180
|
+
continue
|
|
1181
|
+
|
|
1182
|
+
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1183
|
+
tokens = taxon_string.split(';')
|
|
1184
|
+
|
|
1185
|
+
# Don't process GUID, species, or common name
|
|
1186
|
+
# i_token = 6
|
|
1187
|
+
for i_token in range(1,len(tokens)-2):
|
|
1188
|
+
|
|
1189
|
+
test_token = tokens[i_token]
|
|
1190
|
+
if len(test_token) == 0:
|
|
1191
|
+
continue
|
|
1192
|
+
|
|
1193
|
+
# Do we need to make up a taxon for this token?
|
|
1194
|
+
if test_token not in speciesnet_latin_name_to_taxon_string:
|
|
1195
|
+
|
|
1196
|
+
new_tokens = [''] * 7
|
|
1197
|
+
new_tokens[0] = 'fake_guid'
|
|
1198
|
+
for i_copy_token in range(1,i_token+1):
|
|
1199
|
+
new_tokens[i_copy_token] = tokens[i_copy_token]
|
|
1200
|
+
new_tokens[-1] = test_token + ' species'
|
|
1201
|
+
assert new_tokens[-2] == ''
|
|
1202
|
+
new_taxon_string = ';'.join(new_tokens)
|
|
1203
|
+
# assert new_taxon_string not in new_taxon_strings
|
|
1204
|
+
new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
|
|
1205
|
+
|
|
1206
|
+
# ...for each token
|
|
1207
|
+
|
|
1208
|
+
# ...for each taxon
|
|
1209
|
+
|
|
1210
|
+
print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
|
|
1211
|
+
len(new_taxon_string_to_missing_tokens)))
|
|
1212
|
+
|
|
1213
|
+
new_taxon_string_to_missing_tokens = \
|
|
1214
|
+
sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
|
|
1215
|
+
for taxon_string in new_taxon_string_to_missing_tokens:
|
|
1216
|
+
missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
|
|
1217
|
+
print('{} ({})'.format(taxon_string,missing_taxa))
|
|
1218
|
+
|
|
1219
|
+
for new_taxon_string in new_taxon_string_to_missing_tokens:
|
|
1220
|
+
_insert_taxonomy_string(new_taxon_string)
|
|
1221
|
+
|
|
1222
|
+
|
|
1223
|
+
##%% Make sure all species on the allow-list are in the taxonomy
|
|
1224
|
+
|
|
1225
|
+
n_failed_mappings = 0
|
|
1226
|
+
|
|
1227
|
+
for target_taxon_latin_name in target_latin_to_common.keys():
|
|
1228
|
+
if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
|
|
1229
|
+
common_name = target_latin_to_common[target_taxon_latin_name]
|
|
1230
|
+
s = '{} ({}) not in speciesnet taxonomy'.format(
|
|
1231
|
+
target_taxon_latin_name,common_name)
|
|
1232
|
+
if common_name in speciesnet_common_name_to_taxon_string:
|
|
1233
|
+
s += ' (common name maps to {})'.format(
|
|
1234
|
+
speciesnet_common_name_to_taxon_string[common_name])
|
|
1235
|
+
print(s)
|
|
1236
|
+
n_failed_mappings += 1
|
|
1237
|
+
|
|
1238
|
+
if n_failed_mappings > 0:
|
|
1239
|
+
raise ValueError('Cannot continue with geofence generation')
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
##%% For the allow-list, map each parent taxon to a set of allowable child taxa
|
|
1243
|
+
|
|
1244
|
+
# Maps parent names to all allowed child names, or None if this is the
|
|
1245
|
+
# lowest-level allowable taxon on this path
|
|
1246
|
+
allowed_parent_taxon_to_child_taxa = defaultdict(set)
|
|
1247
|
+
|
|
1248
|
+
# latin_name = next(iter(target_latin_to_common.keys()))
|
|
1249
|
+
for latin_name in target_latin_to_common:
|
|
1250
|
+
|
|
1251
|
+
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1252
|
+
tokens = taxon_string.split(';')
|
|
1253
|
+
assert len(tokens) == 7
|
|
1254
|
+
|
|
1255
|
+
# Remove GUID and common mame
|
|
1256
|
+
#
|
|
1257
|
+
# This is now always class/order/family/genus/species
|
|
1258
|
+
tokens = tokens[1:-1]
|
|
1259
|
+
|
|
1260
|
+
child_taxon = None
|
|
1261
|
+
|
|
1262
|
+
# If this is a species
|
|
1263
|
+
if len(tokens[-1]) > 0:
|
|
1264
|
+
binomial_name = tokens[-2] + ' ' + tokens[-1]
|
|
1265
|
+
assert binomial_name == latin_name
|
|
1266
|
+
allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
|
|
1267
|
+
child_taxon = binomial_name
|
|
1268
|
+
|
|
1269
|
+
# The first candidate parent is the genus
|
|
1270
|
+
parent_token_index = len(tokens) - 2
|
|
1271
|
+
|
|
1272
|
+
while(parent_token_index >= 0):
|
|
1273
|
+
|
|
1274
|
+
parent_taxon = tokens[parent_token_index]
|
|
1275
|
+
allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
|
|
1276
|
+
child_taxon = parent_taxon
|
|
1277
|
+
parent_token_index -= 1
|
|
1278
|
+
|
|
1279
|
+
# ...for each allowed latin name
|
|
1280
|
+
|
|
1281
|
+
allowed_parent_taxon_to_child_taxa = \
|
|
1282
|
+
sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
|
|
1283
|
+
|
|
1284
|
+
|
|
1285
|
+
##%% If we were just validating the custom taxa file, we're done
|
|
1286
|
+
|
|
1287
|
+
if input_file is None:
|
|
1288
|
+
print('Finished validating custom taxonomy list')
|
|
1289
|
+
return
|
|
1290
|
+
|
|
1291
|
+
|
|
1292
|
+
##%% Map all predictions that exist in this dataset...
|
|
1293
|
+
|
|
1294
|
+
# ...to the prediction we should generate.
|
|
1295
|
+
|
|
1296
|
+
with open(input_file,'r') as f:
|
|
1297
|
+
input_data = json.load(f)
|
|
1298
|
+
|
|
1299
|
+
input_category_id_to_common_name = input_data['classification_categories'] #noqa
|
|
1300
|
+
input_category_id_to_taxonomy_string = \
|
|
1301
|
+
input_data['classification_category_descriptions']
|
|
1302
|
+
|
|
1303
|
+
input_category_id_to_output_taxon_string = {}
|
|
1304
|
+
|
|
1305
|
+
# input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
|
|
1306
|
+
for input_category_id in input_category_id_to_taxonomy_string.keys():
|
|
1307
|
+
|
|
1308
|
+
input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
|
|
1309
|
+
input_taxon_tokens = input_taxon_string.split(';')
|
|
1310
|
+
assert len(input_taxon_tokens) == 7
|
|
1311
|
+
|
|
1312
|
+
# Don't mess with blank/no-cv-result/animal/human
|
|
1313
|
+
if (input_taxon_string in non_taxonomic_prediction_strings) or \
|
|
1314
|
+
(input_taxon_string == human_prediction_string):
|
|
1315
|
+
input_category_id_to_output_taxon_string[input_category_id] = \
|
|
1316
|
+
input_taxon_string
|
|
1317
|
+
continue
|
|
1318
|
+
|
|
1319
|
+
# Remove GUID and common mame
|
|
1320
|
+
|
|
1321
|
+
# This is now always class/order/family/genus/species
|
|
1322
|
+
input_taxon_tokens = input_taxon_tokens[1:-1]
|
|
1323
|
+
|
|
1324
|
+
test_index = len(input_taxon_tokens) - 1
|
|
1325
|
+
target_taxon = None
|
|
1326
|
+
|
|
1327
|
+
# Start at the species level, and see whether each taxon is allowed
|
|
1328
|
+
while((test_index >= 0) and (target_taxon is None)):
|
|
1329
|
+
|
|
1330
|
+
# Species are represented as binomial names
|
|
1331
|
+
if (test_index == (len(input_taxon_tokens) - 1)) and \
|
|
1332
|
+
(len(input_taxon_tokens[-1]) > 0):
|
|
1333
|
+
test_taxon_name = \
|
|
1334
|
+
input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
|
|
1335
|
+
else:
|
|
1336
|
+
test_taxon_name = input_taxon_tokens[test_index]
|
|
1337
|
+
|
|
1338
|
+
# If we haven't yet found the level at which this taxon is non-empty,
|
|
1339
|
+
# keep going up
|
|
1340
|
+
if len(test_taxon_name) == 0:
|
|
1341
|
+
test_index -= 1
|
|
1342
|
+
continue
|
|
1343
|
+
|
|
1344
|
+
assert test_taxon_name in speciesnet_latin_name_to_taxon_string
|
|
1345
|
+
|
|
1346
|
+
# Is this taxon allowed according to the custom species list?
|
|
1347
|
+
if test_taxon_name in allowed_parent_taxon_to_child_taxa:
|
|
1348
|
+
|
|
1349
|
+
allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
|
|
1350
|
+
assert allowed_child_taxa is not None
|
|
1351
|
+
|
|
1352
|
+
# If this is the lowest-level allowable token or there is not a
|
|
1353
|
+
# unique child, don't walk any further, even if walking down
|
|
1354
|
+
# is enabled.
|
|
1355
|
+
if (None in allowed_child_taxa):
|
|
1356
|
+
assert len(allowed_child_taxa) == 1
|
|
1357
|
+
|
|
1358
|
+
if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
|
|
1359
|
+
target_taxon = test_taxon_name
|
|
1360
|
+
elif not allow_walk_down:
|
|
1361
|
+
target_taxon = test_taxon_name
|
|
1362
|
+
else:
|
|
1363
|
+
# If there's a unique child, walk back *down* the allowable
|
|
1364
|
+
# taxa until we run out of unique children
|
|
1365
|
+
while ((next(iter(allowed_child_taxa)) is not None) and \
|
|
1366
|
+
(len(allowed_child_taxa) == 1)):
|
|
1367
|
+
candidate_taxon = next(iter(allowed_child_taxa))
|
|
1368
|
+
assert candidate_taxon in allowed_parent_taxon_to_child_taxa
|
|
1369
|
+
assert candidate_taxon in speciesnet_latin_name_to_taxon_string
|
|
1370
|
+
allowed_child_taxa = \
|
|
1371
|
+
allowed_parent_taxon_to_child_taxa[candidate_taxon]
|
|
1372
|
+
target_taxon = candidate_taxon
|
|
1373
|
+
|
|
1374
|
+
# ...if this is an allowed taxon
|
|
1375
|
+
|
|
1376
|
+
test_index -= 1
|
|
1377
|
+
|
|
1378
|
+
# ...for each token
|
|
1379
|
+
|
|
1380
|
+
if target_taxon is None:
|
|
1381
|
+
output_taxon_string = animal_prediction_string
|
|
1382
|
+
else:
|
|
1383
|
+
output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
|
|
1384
|
+
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
1385
|
+
|
|
1386
|
+
# ...for each category
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
|
+
##%% Build the new tables
|
|
1390
|
+
|
|
1391
|
+
input_category_id_to_output_category_id = {}
|
|
1392
|
+
output_taxon_string_to_category_id = {}
|
|
1393
|
+
output_category_id_to_common_name = {}
|
|
1394
|
+
|
|
1395
|
+
for input_category_id in input_category_id_to_output_taxon_string:
|
|
1396
|
+
|
|
1397
|
+
original_common_name = \
|
|
1398
|
+
input_category_id_to_common_name[input_category_id]
|
|
1399
|
+
original_taxon_string = \
|
|
1400
|
+
input_category_id_to_taxonomy_string[input_category_id]
|
|
1401
|
+
output_taxon_string = \
|
|
1402
|
+
input_category_id_to_output_taxon_string[input_category_id]
|
|
1403
|
+
|
|
1404
|
+
output_common_name = output_taxon_string.split(';')[-1]
|
|
1405
|
+
|
|
1406
|
+
# Do we need to create a new output category?
|
|
1407
|
+
if output_taxon_string not in output_taxon_string_to_category_id:
|
|
1408
|
+
output_category_id = str(len(output_taxon_string_to_category_id))
|
|
1409
|
+
output_taxon_string_to_category_id[output_taxon_string] = \
|
|
1410
|
+
output_category_id
|
|
1411
|
+
output_category_id_to_common_name[output_category_id] = \
|
|
1412
|
+
output_common_name
|
|
1413
|
+
else:
|
|
1414
|
+
output_category_id = \
|
|
1415
|
+
output_taxon_string_to_category_id[output_taxon_string]
|
|
1416
|
+
|
|
1417
|
+
input_category_id_to_output_category_id[input_category_id] = \
|
|
1418
|
+
output_category_id
|
|
1419
|
+
|
|
1420
|
+
if False:
|
|
1421
|
+
print('Mapping {} ({}) to:\n{} ({})\n'.format(
|
|
1422
|
+
original_common_name,original_taxon_string,
|
|
1423
|
+
output_common_name,output_taxon_string))
|
|
1424
|
+
if False:
|
|
1425
|
+
print('Mapping {} to {}'.format(
|
|
1426
|
+
original_common_name,output_common_name,))
|
|
1427
|
+
|
|
1428
|
+
# ...for each category
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
##%% Remap all category labels
|
|
1432
|
+
|
|
1433
|
+
assert len(set(output_taxon_string_to_category_id.keys())) == \
|
|
1434
|
+
len(set(output_taxon_string_to_category_id.values()))
|
|
1435
|
+
|
|
1436
|
+
output_category_id_to_taxon_string = \
|
|
1437
|
+
invert_dictionary(output_taxon_string_to_category_id)
|
|
1438
|
+
|
|
1439
|
+
with open(input_file,'r') as f:
|
|
1440
|
+
output_data = json.load(f)
|
|
1441
|
+
|
|
1442
|
+
classification_descriptions = None
|
|
1443
|
+
if 'classification_category_descriptions' in output_data:
|
|
1444
|
+
classification_descriptions = output_data['classification_category_descriptions']
|
|
1445
|
+
|
|
1446
|
+
for im in tqdm(output_data['images']):
|
|
1447
|
+
|
|
1448
|
+
if 'detections' not in im or im['detections'] is None:
|
|
1449
|
+
continue
|
|
1450
|
+
|
|
1451
|
+
# Possibly prepare a pre-filtering description
|
|
1452
|
+
pre_filtering_description = None
|
|
1453
|
+
if classification_descriptions is not None and add_pre_filtering_description:
|
|
1454
|
+
category_to_count = count_detections_by_classification_category(im['detections'])
|
|
1455
|
+
pre_filtering_description = \
|
|
1456
|
+
get_classification_description_string(category_to_count,classification_descriptions)
|
|
1457
|
+
im['pre_filtering_description'] = pre_filtering_description
|
|
1458
|
+
|
|
1459
|
+
for det in im['detections']:
|
|
1460
|
+
if 'classifications' in det:
|
|
1461
|
+
for classification in det['classifications']:
|
|
1462
|
+
classification[0] = \
|
|
1463
|
+
input_category_id_to_output_category_id[classification[0]]
|
|
1464
|
+
|
|
1465
|
+
# ...for each image
|
|
1466
|
+
|
|
1467
|
+
output_data['classification_categories'] = output_category_id_to_common_name
|
|
1468
|
+
output_data['classification_category_descriptions'] = \
|
|
1469
|
+
output_category_id_to_taxon_string
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
##%% Write output
|
|
1473
|
+
|
|
1474
|
+
with open(output_file,'w') as f:
|
|
1475
|
+
json.dump(output_data,f,indent=1)
|
|
1476
|
+
|
|
1477
|
+
# ...def restrict_to_taxa_list(...)
|