megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
classification_postprocessing.py
|
|
4
4
|
|
|
5
5
|
Functions for postprocessing species classification results, particularly:
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
* Smoothing results within an image (an image with 700 cows and one deer is really just 701
|
|
8
8
|
cows)
|
|
9
9
|
* Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
|
|
10
10
|
is really just a deer)
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
#%% Constants and imports
|
|
@@ -32,7 +32,7 @@ from megadetector.utils.wi_utils import taxonomy_level_string_to_index
|
|
|
32
32
|
from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
|
|
33
33
|
from megadetector.utils.wi_utils import human_prediction_string
|
|
34
34
|
from megadetector.utils.wi_utils import animal_prediction_string
|
|
35
|
-
from megadetector.utils.wi_utils import blank_prediction_string
|
|
35
|
+
from megadetector.utils.wi_utils import blank_prediction_string # noqa
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
#%% Options classes
|
|
@@ -44,83 +44,83 @@ class ClassificationSmoothingOptions:
|
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
46
|
def __init__(self):
|
|
47
|
-
|
|
48
|
-
#: How many detections do we need in a dominant category to overwrite
|
|
49
|
-
#: non-dominant classifications? This is irrelevant if
|
|
47
|
+
|
|
48
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
49
|
+
#: non-dominant classifications? This is irrelevant if
|
|
50
50
|
#: max_detections_nondominant_class <= 1.
|
|
51
51
|
self.min_detections_to_overwrite_secondary = 4
|
|
52
|
-
|
|
53
|
-
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
52
|
+
|
|
53
|
+
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
54
54
|
#: this many classifications in an image, leave them alone.
|
|
55
55
|
#:
|
|
56
56
|
#: If this is <= 1, we won't replace non-dominant, non-other classes
|
|
57
57
|
#: with the dominant class, even if there are 900 cows and 1 deer.
|
|
58
58
|
self.max_detections_nondominant_class = 1
|
|
59
|
-
|
|
60
|
-
#: How many detections do we need in a dominant category to overwrite
|
|
61
|
-
#: non-dominant classifications in the same family? If this is <= 0,
|
|
62
|
-
#: we'll skip this step. This option doesn't mean anything if
|
|
59
|
+
|
|
60
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
61
|
+
#: non-dominant classifications in the same family? If this is <= 0,
|
|
62
|
+
#: we'll skip this step. This option doesn't mean anything if
|
|
63
63
|
#: max_detections_nondominant_class_same_family <= 1.
|
|
64
64
|
self.min_detections_to_overwrite_secondary_same_family = 2
|
|
65
|
-
|
|
66
|
-
#: If we have this many classifications of a nondominant category,
|
|
65
|
+
|
|
66
|
+
#: If we have this many classifications of a nondominant category,
|
|
67
67
|
#: we won't do same-family overwrites. <= 1 means "even if there are
|
|
68
68
|
#: a million deer, if there are two million moose, call all the deer
|
|
69
|
-
#: moose". This option doesn't mean anything if
|
|
69
|
+
#: moose". This option doesn't mean anything if
|
|
70
70
|
#: min_detections_to_overwrite_secondary_same_family <= 0.
|
|
71
71
|
self.max_detections_nondominant_class_same_family = -1
|
|
72
|
-
|
|
73
|
-
#: If the dominant class has at least this many classifications, overwrite
|
|
72
|
+
|
|
73
|
+
#: If the dominant class has at least this many classifications, overwrite
|
|
74
74
|
#: "other" classifications with the dominant class
|
|
75
75
|
self.min_detections_to_overwrite_other = 2
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
#: Names to treat as "other" categories; can't be None, but can be empty
|
|
78
78
|
#:
|
|
79
79
|
#: "Other" classifications will be changed to the dominant category, regardless
|
|
80
|
-
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
80
|
+
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
81
81
|
#: examples of the dominant class. For example, cow/other will remain unchanged,
|
|
82
82
|
#: but cow/cow/other will become cow/cow/cow.
|
|
83
83
|
self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
#: We're not even going to mess around with classifications below this threshold.
|
|
86
86
|
#:
|
|
87
87
|
#: We won't count them, we won't over-write them, they don't exist during the
|
|
88
88
|
#: within-image smoothing step.
|
|
89
89
|
self.classification_confidence_threshold = 0.5
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
#: We're not even going to mess around with detections below this threshold.
|
|
92
92
|
#:
|
|
93
93
|
#: We won't count them, we won't over-write them, they don't exist during the
|
|
94
94
|
#: within-image smoothing step.
|
|
95
95
|
self.detection_confidence_threshold = 0.15
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
#: If classification descriptions are present and appear to represent taxonomic
|
|
98
|
-
#: information, should we propagate classifications when lower-level taxa are more
|
|
99
|
-
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
98
|
+
#: information, should we propagate classifications when lower-level taxa are more
|
|
99
|
+
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
100
100
|
#: we make that "fox/fox/fox/deer"?
|
|
101
101
|
self.propagate_classifications_through_taxonomy = True
|
|
102
|
-
|
|
103
|
-
#: When propagating classifications down through taxonomy levels, we have to
|
|
102
|
+
|
|
103
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
104
104
|
#: decide whether we prefer more frequent categories or more specific categories.
|
|
105
105
|
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
106
106
|
#: balance levels against counts in this process.
|
|
107
107
|
self.taxonomy_propagation_level_weight = 1.0
|
|
108
|
-
|
|
109
|
-
#: When propagating classifications down through taxonomy levels, we have to
|
|
108
|
+
|
|
109
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
110
110
|
#: decide whether we prefer more frequent categories or more specific categories.
|
|
111
111
|
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
112
112
|
#: balance levels against counts in this process.
|
|
113
113
|
#:
|
|
114
114
|
#: With a very low default value, this just breaks ties.
|
|
115
115
|
self.taxonomy_propagation_count_weight = 0.01
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
#: Should we record information about the state of labels prior to smoothing?
|
|
118
118
|
self.add_pre_smoothing_description = True
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
#: When a dict (rather than a file) is passed to either smoothing function,
|
|
121
121
|
#: if this is True, we'll make a copy of the input dict before modifying.
|
|
122
122
|
self.modify_in_place = False
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
#: Debug options
|
|
125
125
|
self.break_at_image = None
|
|
126
126
|
|
|
@@ -131,31 +131,31 @@ def _results_for_sequence(images_this_sequence,filename_to_results):
|
|
|
131
131
|
"""
|
|
132
132
|
Fetch MD results for every image in this sequence, based on the 'file_name' field
|
|
133
133
|
"""
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
results_this_sequence = []
|
|
136
136
|
for im in images_this_sequence:
|
|
137
137
|
fn = im['file_name']
|
|
138
138
|
results_this_image = filename_to_results[fn]
|
|
139
139
|
assert isinstance(results_this_image,dict)
|
|
140
140
|
results_this_sequence.append(results_this_image)
|
|
141
|
-
|
|
141
|
+
|
|
142
142
|
return results_this_sequence
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
|
|
144
|
+
|
|
145
145
|
def _sort_images_by_time(images):
|
|
146
146
|
"""
|
|
147
147
|
Returns a copy of [images], sorted by the 'datetime' field (ascending).
|
|
148
148
|
"""
|
|
149
|
-
return sorted(images, key = lambda im: im['datetime'])
|
|
149
|
+
return sorted(images, key = lambda im: im['datetime'])
|
|
150
150
|
|
|
151
151
|
|
|
152
152
|
def count_detections_by_classification_category(detections,options=None):
|
|
153
153
|
"""
|
|
154
154
|
Count the number of instances of each classification category in the detections list
|
|
155
|
-
[detections] that have an above-threshold detection. Sort results in descending
|
|
155
|
+
[detections] that have an above-threshold detection. Sort results in descending
|
|
156
156
|
order by count. Returns a dict mapping category ID --> count. If no detections
|
|
157
157
|
are above threshold, returns an empty dict.
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
Only processes the top classification for each detection.
|
|
160
160
|
|
|
161
161
|
Args:
|
|
@@ -165,26 +165,26 @@ def count_detections_by_classification_category(detections,options=None):
|
|
|
165
165
|
Returns:
|
|
166
166
|
dict mapping above-threshold category IDs to counts
|
|
167
167
|
"""
|
|
168
|
-
|
|
168
|
+
|
|
169
169
|
if detections is None or len(detections) == 0:
|
|
170
170
|
return {}
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
if options is None:
|
|
173
173
|
options = ClassificationSmoothingOptions()
|
|
174
174
|
|
|
175
175
|
category_to_count = defaultdict(int)
|
|
176
|
-
|
|
176
|
+
|
|
177
177
|
for det in detections:
|
|
178
178
|
if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
|
|
179
179
|
# assert len(det['classifications']) == 1
|
|
180
180
|
c = det['classifications'][0]
|
|
181
181
|
if c[1] >= options.classification_confidence_threshold:
|
|
182
|
-
category_to_count[c[0]] += 1
|
|
183
|
-
|
|
182
|
+
category_to_count[c[0]] += 1
|
|
183
|
+
|
|
184
184
|
category_to_count = {k: v for k, v in sorted(category_to_count.items(),
|
|
185
|
-
key=lambda item: item[1],
|
|
185
|
+
key=lambda item: item[1],
|
|
186
186
|
reverse=True)}
|
|
187
|
-
|
|
187
|
+
|
|
188
188
|
return category_to_count
|
|
189
189
|
|
|
190
190
|
|
|
@@ -199,7 +199,7 @@ def get_classification_description_string(category_to_count,classification_descr
|
|
|
199
199
|
Returns:
|
|
200
200
|
string: a description of this image's content, e.g. "rabbit (4), human (1)"
|
|
201
201
|
"""
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
category_strings = []
|
|
204
204
|
# category_id = next(iter(category_to_count))
|
|
205
205
|
for category_id in category_to_count:
|
|
@@ -212,29 +212,29 @@ def get_classification_description_string(category_to_count,classification_descr
|
|
|
212
212
|
count = category_to_count[category_id]
|
|
213
213
|
category_string = '{} ({})'.format(category_name,count)
|
|
214
214
|
category_strings.append(category_string)
|
|
215
|
-
|
|
215
|
+
|
|
216
216
|
return ', '.join(category_strings)
|
|
217
|
-
|
|
217
|
+
|
|
218
218
|
|
|
219
219
|
def _print_counts_with_names(category_to_count,classification_descriptions):
|
|
220
220
|
"""
|
|
221
221
|
Print a list of classification categories with counts, based in the name --> count
|
|
222
222
|
dict [category_to_count]
|
|
223
223
|
"""
|
|
224
|
-
|
|
224
|
+
|
|
225
225
|
for category_id in category_to_count:
|
|
226
226
|
category_name = classification_descriptions[category_id]
|
|
227
227
|
count = category_to_count[category_id]
|
|
228
228
|
print('{}: {} ({})'.format(category_id,category_name,count))
|
|
229
|
-
|
|
230
|
-
|
|
229
|
+
|
|
230
|
+
|
|
231
231
|
def _prepare_results_for_smoothing(input_file,options):
|
|
232
232
|
"""
|
|
233
|
-
Load results from [input_file] if necessary, prepare category descriptions
|
|
233
|
+
Load results from [input_file] if necessary, prepare category descriptions
|
|
234
234
|
for smoothing. Adds pre-smoothing descriptions to every image if the options
|
|
235
235
|
say we're supposed to do that.
|
|
236
236
|
"""
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
if isinstance(input_file,str):
|
|
239
239
|
with open(input_file,'r') as f:
|
|
240
240
|
print('Loading results from:\n{}'.format(input_file))
|
|
@@ -249,71 +249,71 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
249
249
|
|
|
250
250
|
|
|
251
251
|
## Category processing
|
|
252
|
-
|
|
252
|
+
|
|
253
253
|
category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
|
|
254
254
|
other_category_ids = []
|
|
255
255
|
for s in options.other_category_names:
|
|
256
256
|
if s in category_name_to_id:
|
|
257
257
|
other_category_ids.append(category_name_to_id[s])
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
# Before we do anything else, get rid of everything but the top classification
|
|
260
260
|
# for each detection, and remove the 'classifications' field from detections with
|
|
261
261
|
# no classifications.
|
|
262
262
|
for im in tqdm(d['images']):
|
|
263
|
-
|
|
263
|
+
|
|
264
264
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
265
265
|
continue
|
|
266
|
-
|
|
266
|
+
|
|
267
267
|
detections = im['detections']
|
|
268
|
-
|
|
268
|
+
|
|
269
269
|
for det in detections:
|
|
270
|
-
|
|
270
|
+
|
|
271
271
|
if 'classifications' not in det:
|
|
272
272
|
continue
|
|
273
273
|
if len(det['classifications']) == 0:
|
|
274
274
|
del det['classifications']
|
|
275
275
|
continue
|
|
276
|
-
|
|
276
|
+
|
|
277
277
|
classification_confidence_values = [c[1] for c in det['classifications']]
|
|
278
278
|
assert is_list_sorted(classification_confidence_values,reverse=True)
|
|
279
279
|
det['classifications'] = [det['classifications'][0]]
|
|
280
|
-
|
|
280
|
+
|
|
281
281
|
# ...for each detection in this image
|
|
282
|
-
|
|
282
|
+
|
|
283
283
|
# ...for each image
|
|
284
|
-
|
|
285
|
-
|
|
284
|
+
|
|
285
|
+
|
|
286
286
|
## Clean up classification descriptions so we can test taxonomic relationships
|
|
287
287
|
## by substring testing.
|
|
288
|
-
|
|
288
|
+
|
|
289
289
|
classification_descriptions_clean = None
|
|
290
290
|
classification_descriptions = None
|
|
291
|
-
|
|
291
|
+
|
|
292
292
|
if 'classification_category_descriptions' in d:
|
|
293
293
|
classification_descriptions = d['classification_category_descriptions']
|
|
294
294
|
classification_descriptions_clean = {}
|
|
295
295
|
# category_id = next(iter(classification_descriptions))
|
|
296
|
-
for category_id in classification_descriptions:
|
|
296
|
+
for category_id in classification_descriptions:
|
|
297
297
|
classification_descriptions_clean[category_id] = \
|
|
298
298
|
clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
|
|
299
|
-
|
|
300
|
-
|
|
299
|
+
|
|
300
|
+
|
|
301
301
|
## Optionally add pre-smoothing descriptions to every image
|
|
302
|
-
|
|
303
|
-
if options.add_pre_smoothing_description:
|
|
304
|
-
|
|
302
|
+
|
|
303
|
+
if options.add_pre_smoothing_description and (classification_descriptions is not None):
|
|
304
|
+
|
|
305
305
|
for im in tqdm(d['images']):
|
|
306
|
-
|
|
306
|
+
|
|
307
307
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
308
308
|
continue
|
|
309
|
-
|
|
310
|
-
detections = im['detections']
|
|
309
|
+
|
|
310
|
+
detections = im['detections']
|
|
311
311
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
312
|
-
|
|
312
|
+
|
|
313
313
|
im['pre_smoothing_description'] = \
|
|
314
314
|
get_classification_description_string(category_to_count, classification_descriptions)
|
|
315
|
-
|
|
316
|
-
|
|
315
|
+
|
|
316
|
+
|
|
317
317
|
return {
|
|
318
318
|
'd':d,
|
|
319
319
|
'other_category_ids':other_category_ids,
|
|
@@ -321,7 +321,7 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
321
321
|
'classification_descriptions':classification_descriptions
|
|
322
322
|
}
|
|
323
323
|
|
|
324
|
-
# ...def _prepare_results_for_smoothing(...)
|
|
324
|
+
# ...def _prepare_results_for_smoothing(...)
|
|
325
325
|
|
|
326
326
|
|
|
327
327
|
def _smooth_classifications_for_list_of_detections(detections,
|
|
@@ -332,142 +332,142 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
332
332
|
"""
|
|
333
333
|
Smooth classifications for a list of detections, which may have come from a single
|
|
334
334
|
image, or may represent an entire sequence.
|
|
335
|
-
|
|
335
|
+
|
|
336
336
|
Returns None if no changes are made, else a dict.
|
|
337
|
-
|
|
338
|
-
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
337
|
+
|
|
338
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
339
339
|
from which common names and GUIDs have already been removed.
|
|
340
|
-
|
|
340
|
+
|
|
341
341
|
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
342
|
-
have already been remoevd.
|
|
342
|
+
have already been remoevd.
|
|
343
343
|
"""
|
|
344
|
-
|
|
344
|
+
|
|
345
345
|
## Count the number of instances of each category in this image
|
|
346
|
-
|
|
346
|
+
|
|
347
347
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
348
348
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
349
349
|
# get_classification_description_string(category_to_count, classification_descriptions)
|
|
350
|
-
|
|
350
|
+
|
|
351
351
|
if len(category_to_count) <= 1:
|
|
352
352
|
return None
|
|
353
|
-
|
|
353
|
+
|
|
354
354
|
keys = list(category_to_count.keys())
|
|
355
|
-
|
|
356
|
-
# Handle a quirky special case: if the most common category is "other" and
|
|
355
|
+
|
|
356
|
+
# Handle a quirky special case: if the most common category is "other" and
|
|
357
357
|
# it's "tied" with the second-most-common category, swap them
|
|
358
358
|
if (len(keys) > 1) and \
|
|
359
359
|
(keys[0] in other_category_ids) and \
|
|
360
360
|
(keys[1] not in other_category_ids) and \
|
|
361
361
|
(category_to_count[keys[0]] == category_to_count[keys[1]]):
|
|
362
362
|
keys[1], keys[0] = keys[0], keys[1]
|
|
363
|
-
|
|
364
|
-
max_count = category_to_count[keys[0]]
|
|
363
|
+
|
|
364
|
+
max_count = category_to_count[keys[0]]
|
|
365
365
|
most_common_category = keys[0]
|
|
366
366
|
del keys
|
|
367
|
-
|
|
368
|
-
|
|
367
|
+
|
|
368
|
+
|
|
369
369
|
## Debug tools
|
|
370
|
-
|
|
370
|
+
|
|
371
371
|
verbose_debug_enabled = False
|
|
372
|
-
|
|
372
|
+
|
|
373
373
|
if options.break_at_image is not None:
|
|
374
374
|
for det in detections:
|
|
375
375
|
if 'image_filename' in det and \
|
|
376
376
|
det['image_filename'] == options.break_at_image:
|
|
377
377
|
verbose_debug_enabled = True
|
|
378
378
|
break
|
|
379
|
-
|
|
379
|
+
|
|
380
380
|
if verbose_debug_enabled:
|
|
381
381
|
_print_counts_with_names(category_to_count,classification_descriptions)
|
|
382
382
|
from IPython import embed; embed()
|
|
383
|
-
|
|
384
|
-
|
|
383
|
+
|
|
384
|
+
|
|
385
385
|
## Possibly change "other" classifications to the most common category
|
|
386
|
-
|
|
386
|
+
|
|
387
387
|
# ...if the dominant category is not an "other" category.
|
|
388
|
-
|
|
388
|
+
|
|
389
389
|
n_other_classifications_changed_this_image = 0
|
|
390
|
-
|
|
390
|
+
|
|
391
391
|
# If we have at least *min_detections_to_overwrite_other* in a category that isn't
|
|
392
392
|
# "other", change all "other" classifications to that category
|
|
393
393
|
if (max_count >= options.min_detections_to_overwrite_other) and \
|
|
394
394
|
(most_common_category not in other_category_ids):
|
|
395
|
-
|
|
395
|
+
|
|
396
396
|
for det in detections:
|
|
397
|
-
|
|
397
|
+
|
|
398
398
|
if ('classifications' not in det) or \
|
|
399
399
|
(det['conf'] < options.detection_confidence_threshold):
|
|
400
400
|
continue
|
|
401
|
-
|
|
401
|
+
|
|
402
402
|
assert len(det['classifications']) == 1
|
|
403
403
|
c = det['classifications'][0]
|
|
404
|
-
|
|
404
|
+
|
|
405
405
|
if (c[1] >= options.classification_confidence_threshold) and \
|
|
406
406
|
(c[0] in other_category_ids):
|
|
407
|
-
|
|
407
|
+
|
|
408
408
|
if verbose_debug_enabled:
|
|
409
409
|
print('Replacing {} with {}'.format(
|
|
410
410
|
classification_descriptions[c[0]],
|
|
411
411
|
classification_descriptions[c[1]]))
|
|
412
|
-
|
|
412
|
+
|
|
413
413
|
n_other_classifications_changed_this_image += 1
|
|
414
414
|
c[0] = most_common_category
|
|
415
|
-
|
|
415
|
+
|
|
416
416
|
# ...if there are classifications for this detection
|
|
417
|
-
|
|
417
|
+
|
|
418
418
|
# ...for each detection
|
|
419
|
-
|
|
419
|
+
|
|
420
420
|
# ...if we should overwrite all "other" classifications
|
|
421
421
|
|
|
422
422
|
if verbose_debug_enabled:
|
|
423
423
|
print('Made {} other changes'.format(n_other_classifications_changed_this_image))
|
|
424
|
-
|
|
425
|
-
|
|
424
|
+
|
|
425
|
+
|
|
426
426
|
## Re-count
|
|
427
|
-
|
|
427
|
+
|
|
428
428
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
429
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
429
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
430
430
|
keys = list(category_to_count.keys())
|
|
431
|
-
max_count = category_to_count[keys[0]]
|
|
431
|
+
max_count = category_to_count[keys[0]]
|
|
432
432
|
most_common_category = keys[0]
|
|
433
433
|
del keys
|
|
434
|
-
|
|
435
|
-
|
|
434
|
+
|
|
435
|
+
|
|
436
436
|
## Possibly change some non-dominant classifications to the dominant category
|
|
437
|
-
|
|
437
|
+
|
|
438
438
|
process_taxonomic_rules = \
|
|
439
439
|
(classification_descriptions_clean is not None) and \
|
|
440
440
|
(len(classification_descriptions_clean) > 0) and \
|
|
441
441
|
(len(category_to_count) > 1)
|
|
442
|
-
|
|
442
|
+
|
|
443
443
|
n_detections_flipped_this_image = 0
|
|
444
|
-
|
|
445
|
-
# Don't do this if the most common category is an "other" category, or
|
|
444
|
+
|
|
445
|
+
# Don't do this if the most common category is an "other" category, or
|
|
446
446
|
# if we don't have enough of the most common category
|
|
447
447
|
if (most_common_category not in other_category_ids) and \
|
|
448
448
|
(max_count >= options.min_detections_to_overwrite_secondary):
|
|
449
|
-
|
|
449
|
+
|
|
450
450
|
# i_det = 0; det = detections[i_det]
|
|
451
451
|
for i_det,det in enumerate(detections):
|
|
452
|
-
|
|
452
|
+
|
|
453
453
|
if ('classifications' not in det) or \
|
|
454
454
|
(det['conf'] < options.detection_confidence_threshold):
|
|
455
455
|
continue
|
|
456
|
-
|
|
456
|
+
|
|
457
457
|
assert len(det['classifications']) == 1
|
|
458
458
|
c = det['classifications'][0]
|
|
459
|
-
|
|
459
|
+
|
|
460
460
|
# Don't over-write the most common category with itself
|
|
461
461
|
if c[0] == most_common_category:
|
|
462
462
|
continue
|
|
463
|
-
|
|
463
|
+
|
|
464
464
|
# Don't bother with below-threshold classifications
|
|
465
465
|
if c[1] < options.classification_confidence_threshold:
|
|
466
466
|
continue
|
|
467
|
-
|
|
467
|
+
|
|
468
468
|
# If we're doing taxonomic processing, at this stage, don't turn children
|
|
469
469
|
# into parents; we'll likely turn parents into children in the next stage.
|
|
470
|
-
|
|
470
|
+
|
|
471
471
|
if process_taxonomic_rules:
|
|
472
472
|
|
|
473
473
|
most_common_category_description = \
|
|
@@ -475,180 +475,180 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
475
475
|
|
|
476
476
|
category_id_this_classification = c[0]
|
|
477
477
|
assert category_id_this_classification in category_to_count
|
|
478
|
-
|
|
478
|
+
|
|
479
479
|
category_description_this_classification = \
|
|
480
480
|
classification_descriptions_clean[category_id_this_classification]
|
|
481
|
-
|
|
482
|
-
# An empty description corresponds to the "animal" category. We don't handle
|
|
483
|
-
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
481
|
+
|
|
482
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
483
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
484
484
|
# step above.
|
|
485
485
|
if len(category_description_this_classification) == 0:
|
|
486
486
|
continue
|
|
487
|
-
|
|
487
|
+
|
|
488
488
|
most_common_category_is_parent_of_this_category = \
|
|
489
489
|
most_common_category_description in category_description_this_classification
|
|
490
|
-
|
|
490
|
+
|
|
491
491
|
if most_common_category_is_parent_of_this_category:
|
|
492
492
|
continue
|
|
493
|
-
|
|
493
|
+
|
|
494
494
|
# If we have fewer of this category than the most common category,
|
|
495
495
|
# but not *too* many, flip it to the most common category.
|
|
496
496
|
if (max_count > category_to_count[c[0]]) and \
|
|
497
497
|
(category_to_count[c[0]] <= options.max_detections_nondominant_class):
|
|
498
|
-
|
|
498
|
+
|
|
499
499
|
c[0] = most_common_category
|
|
500
|
-
n_detections_flipped_this_image += 1
|
|
501
|
-
|
|
500
|
+
n_detections_flipped_this_image += 1
|
|
501
|
+
|
|
502
502
|
# ...for each detection
|
|
503
503
|
|
|
504
|
-
# ...if the dominant category is legit
|
|
505
|
-
|
|
504
|
+
# ...if the dominant category is legit
|
|
505
|
+
|
|
506
506
|
if verbose_debug_enabled:
|
|
507
507
|
print('Made {} non-dominant --> dominant changes'.format(
|
|
508
508
|
n_detections_flipped_this_image))
|
|
509
509
|
|
|
510
|
-
|
|
510
|
+
|
|
511
511
|
## Re-count
|
|
512
|
-
|
|
512
|
+
|
|
513
513
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
514
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
514
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
515
515
|
keys = list(category_to_count.keys())
|
|
516
|
-
max_count = category_to_count[keys[0]]
|
|
516
|
+
max_count = category_to_count[keys[0]]
|
|
517
517
|
most_common_category = keys[0]
|
|
518
518
|
del keys
|
|
519
|
-
|
|
520
|
-
|
|
519
|
+
|
|
520
|
+
|
|
521
521
|
## Possibly collapse higher-level taxonomic predictions down to lower levels
|
|
522
|
-
|
|
522
|
+
|
|
523
523
|
n_taxonomic_changes_this_image = 0
|
|
524
|
-
|
|
524
|
+
|
|
525
525
|
process_taxonomic_rules = \
|
|
526
526
|
(classification_descriptions_clean is not None) and \
|
|
527
527
|
(len(classification_descriptions_clean) > 0) and \
|
|
528
528
|
(len(category_to_count) > 1)
|
|
529
|
-
|
|
529
|
+
|
|
530
530
|
if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
|
|
531
|
-
|
|
531
|
+
|
|
532
532
|
# det = detections[3]
|
|
533
533
|
for det in detections:
|
|
534
|
-
|
|
534
|
+
|
|
535
535
|
if ('classifications' not in det) or \
|
|
536
536
|
(det['conf'] < options.detection_confidence_threshold):
|
|
537
537
|
continue
|
|
538
|
-
|
|
538
|
+
|
|
539
539
|
assert len(det['classifications']) == 1
|
|
540
540
|
c = det['classifications'][0]
|
|
541
|
-
|
|
541
|
+
|
|
542
542
|
# Don't bother with any classifications below the confidence threshold
|
|
543
543
|
if c[1] < options.classification_confidence_threshold:
|
|
544
544
|
continue
|
|
545
545
|
|
|
546
546
|
category_id_this_classification = c[0]
|
|
547
547
|
assert category_id_this_classification in category_to_count
|
|
548
|
-
|
|
548
|
+
|
|
549
549
|
category_description_this_classification = \
|
|
550
550
|
classification_descriptions_clean[category_id_this_classification]
|
|
551
|
-
|
|
552
|
-
# An empty description corresponds to the "animal" category. We don't handle
|
|
553
|
-
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
551
|
+
|
|
552
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
553
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
554
554
|
# step above.
|
|
555
555
|
if len(category_description_this_classification) == 0:
|
|
556
556
|
continue
|
|
557
|
-
|
|
557
|
+
|
|
558
558
|
# We may have multiple child categories to choose from; this keeps track of
|
|
559
559
|
# the "best" we've seen so far. "Best" is based on the level (species is better
|
|
560
560
|
# than genus) and number.
|
|
561
561
|
child_category_to_score = defaultdict(float)
|
|
562
|
-
|
|
562
|
+
|
|
563
563
|
for category_id_of_candidate_child in category_to_count.keys():
|
|
564
|
-
|
|
564
|
+
|
|
565
565
|
# A category is never its own child
|
|
566
566
|
if category_id_of_candidate_child == category_id_this_classification:
|
|
567
567
|
continue
|
|
568
|
-
|
|
568
|
+
|
|
569
569
|
# Is this candidate a child of the current classification?
|
|
570
570
|
category_description_candidate_child = \
|
|
571
571
|
classification_descriptions_clean[category_id_of_candidate_child]
|
|
572
|
-
|
|
572
|
+
|
|
573
573
|
# An empty description corresponds to "animal", which can never
|
|
574
574
|
# be a child of another category.
|
|
575
575
|
if len(category_description_candidate_child) == 0:
|
|
576
576
|
continue
|
|
577
|
-
|
|
578
|
-
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
577
|
+
|
|
578
|
+
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
579
579
|
# relationships are defined by a substring relationship
|
|
580
580
|
is_child = category_description_this_classification in \
|
|
581
581
|
category_description_candidate_child
|
|
582
582
|
if not is_child:
|
|
583
583
|
continue
|
|
584
|
-
|
|
584
|
+
|
|
585
585
|
# How many instances of this child category are there?
|
|
586
586
|
child_category_count = category_to_count[category_id_of_candidate_child]
|
|
587
|
-
|
|
587
|
+
|
|
588
588
|
# What taxonomy level is this child category defined at?
|
|
589
589
|
child_category_level = taxonomy_level_index(
|
|
590
590
|
classification_descriptions[category_id_of_candidate_child])
|
|
591
|
-
|
|
591
|
+
|
|
592
592
|
child_category_to_score[category_id_of_candidate_child] = \
|
|
593
593
|
child_category_level * options.taxonomy_propagation_level_weight + \
|
|
594
594
|
child_category_count * options.taxonomy_propagation_count_weight
|
|
595
|
-
|
|
595
|
+
|
|
596
596
|
# ...for each category we are considering reducing this classification to
|
|
597
|
-
|
|
597
|
+
|
|
598
598
|
# Did we find a category we want to change this classification to?
|
|
599
599
|
if len(child_category_to_score) > 0:
|
|
600
|
-
|
|
600
|
+
|
|
601
601
|
# Find the child category with the highest score
|
|
602
602
|
child_category_to_score = sort_dictionary_by_value(
|
|
603
603
|
child_category_to_score,reverse=True)
|
|
604
604
|
best_child_category = next(iter(child_category_to_score.keys()))
|
|
605
|
-
|
|
605
|
+
|
|
606
606
|
if verbose_debug_enabled:
|
|
607
607
|
old_category_name = \
|
|
608
608
|
classification_descriptions_clean[c[0]]
|
|
609
609
|
new_category_name = \
|
|
610
610
|
classification_descriptions_clean[best_child_category]
|
|
611
611
|
print('Replacing {} with {}'.format(
|
|
612
|
-
old_category_name,new_category_name))
|
|
613
|
-
|
|
612
|
+
old_category_name,new_category_name))
|
|
613
|
+
|
|
614
614
|
c[0] = best_child_category
|
|
615
|
-
n_taxonomic_changes_this_image += 1
|
|
616
|
-
|
|
615
|
+
n_taxonomic_changes_this_image += 1
|
|
616
|
+
|
|
617
617
|
# ...for each detection
|
|
618
|
-
|
|
619
|
-
# ...if we have taxonomic information available
|
|
620
|
-
|
|
621
|
-
|
|
618
|
+
|
|
619
|
+
# ...if we have taxonomic information available
|
|
620
|
+
|
|
621
|
+
|
|
622
622
|
## Re-count
|
|
623
|
-
|
|
623
|
+
|
|
624
624
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
625
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
625
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
626
626
|
keys = list(category_to_count.keys())
|
|
627
|
-
max_count = category_to_count[keys[0]]
|
|
627
|
+
max_count = category_to_count[keys[0]]
|
|
628
628
|
most_common_category = keys[0]
|
|
629
629
|
del keys
|
|
630
|
-
|
|
631
|
-
|
|
630
|
+
|
|
631
|
+
|
|
632
632
|
## Possibly do within-family smoothing
|
|
633
|
-
|
|
633
|
+
|
|
634
634
|
n_within_family_smoothing_changes = 0
|
|
635
|
-
|
|
635
|
+
|
|
636
636
|
# min_detections_to_overwrite_secondary_same_family = -1
|
|
637
637
|
# max_detections_nondominant_class_same_family = 1
|
|
638
638
|
family_level = taxonomy_level_string_to_index('family')
|
|
639
|
-
|
|
639
|
+
|
|
640
640
|
if process_taxonomic_rules:
|
|
641
|
-
|
|
641
|
+
|
|
642
642
|
category_description_most_common_category = \
|
|
643
643
|
classification_descriptions[most_common_category]
|
|
644
644
|
most_common_category_taxonomic_level = \
|
|
645
|
-
taxonomy_level_index(category_description_most_common_category)
|
|
645
|
+
taxonomy_level_index(category_description_most_common_category)
|
|
646
646
|
n_most_common_category = category_to_count[most_common_category]
|
|
647
647
|
tokens = category_description_most_common_category.split(';')
|
|
648
648
|
assert len(tokens) == 7
|
|
649
649
|
most_common_category_family = tokens[3]
|
|
650
650
|
most_common_category_genus = tokens[4]
|
|
651
|
-
|
|
651
|
+
|
|
652
652
|
# Only consider remapping to genus or species level, and only when we have
|
|
653
653
|
# a high enough count in the most common category
|
|
654
654
|
if process_taxonomic_rules and \
|
|
@@ -656,36 +656,36 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
656
656
|
(most_common_category not in other_category_ids) and \
|
|
657
657
|
(most_common_category_taxonomic_level > family_level) and \
|
|
658
658
|
(n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
|
|
659
|
-
|
|
659
|
+
|
|
660
660
|
# det = detections[0]
|
|
661
661
|
for det in detections:
|
|
662
|
-
|
|
662
|
+
|
|
663
663
|
if ('classifications' not in det) or \
|
|
664
664
|
(det['conf'] < options.detection_confidence_threshold):
|
|
665
665
|
continue
|
|
666
|
-
|
|
666
|
+
|
|
667
667
|
assert len(det['classifications']) == 1
|
|
668
668
|
c = det['classifications'][0]
|
|
669
|
-
|
|
669
|
+
|
|
670
670
|
# Don't over-write the most common category with itself
|
|
671
671
|
if c[0] == most_common_category:
|
|
672
672
|
continue
|
|
673
|
-
|
|
673
|
+
|
|
674
674
|
# Don't bother with below-threshold classifications
|
|
675
675
|
if c[1] < options.classification_confidence_threshold:
|
|
676
|
-
continue
|
|
677
|
-
|
|
676
|
+
continue
|
|
677
|
+
|
|
678
678
|
n_candidate_flip_category = category_to_count[c[0]]
|
|
679
|
-
|
|
679
|
+
|
|
680
680
|
# Do we have too many of the non-dominant category to do this kind of swap?
|
|
681
681
|
if n_candidate_flip_category > \
|
|
682
682
|
options.max_detections_nondominant_class_same_family:
|
|
683
683
|
continue
|
|
684
684
|
|
|
685
|
-
# Don't flip classes when it's a tie
|
|
685
|
+
# Don't flip classes when it's a tie
|
|
686
686
|
if n_candidate_flip_category == n_most_common_category:
|
|
687
687
|
continue
|
|
688
|
-
|
|
688
|
+
|
|
689
689
|
category_description_candidate_flip = \
|
|
690
690
|
classification_descriptions[c[0]]
|
|
691
691
|
tokens = category_description_candidate_flip.split(';')
|
|
@@ -693,34 +693,34 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
693
693
|
candidate_flip_category_family = tokens[3]
|
|
694
694
|
candidate_flip_category_genus = tokens[4]
|
|
695
695
|
candidate_flip_category_taxonomic_level = \
|
|
696
|
-
taxonomy_level_index(category_description_candidate_flip)
|
|
697
|
-
|
|
696
|
+
taxonomy_level_index(category_description_candidate_flip)
|
|
697
|
+
|
|
698
698
|
# Only proceed if we have valid family strings
|
|
699
699
|
if (len(candidate_flip_category_family) == 0) or \
|
|
700
700
|
(len(most_common_category_family) == 0):
|
|
701
701
|
continue
|
|
702
|
-
|
|
703
|
-
# Only proceed if the candidate and the most common category are in the same family
|
|
702
|
+
|
|
703
|
+
# Only proceed if the candidate and the most common category are in the same family
|
|
704
704
|
if candidate_flip_category_family != most_common_category_family:
|
|
705
705
|
continue
|
|
706
|
-
|
|
706
|
+
|
|
707
707
|
# Don't flip from a species to the genus level in the same genus
|
|
708
708
|
if (candidate_flip_category_genus == most_common_category_genus) and \
|
|
709
709
|
(candidate_flip_category_taxonomic_level > \
|
|
710
710
|
most_common_category_taxonomic_level):
|
|
711
711
|
continue
|
|
712
|
-
|
|
712
|
+
|
|
713
713
|
old_category_name = classification_descriptions_clean[c[0]]
|
|
714
714
|
new_category_name = classification_descriptions_clean[most_common_category]
|
|
715
|
-
|
|
715
|
+
|
|
716
716
|
c[0] = most_common_category
|
|
717
|
-
n_within_family_smoothing_changes += 1
|
|
718
|
-
|
|
717
|
+
n_within_family_smoothing_changes += 1
|
|
718
|
+
|
|
719
719
|
# ...for each detection
|
|
720
|
-
|
|
720
|
+
|
|
721
721
|
# ...if the dominant category is legit and we have taxonomic information available
|
|
722
|
-
|
|
723
|
-
|
|
722
|
+
|
|
723
|
+
|
|
724
724
|
return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
|
|
725
725
|
'n_detections_flipped_this_image':n_detections_flipped_this_image,
|
|
726
726
|
'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
|
|
@@ -737,33 +737,33 @@ def _smooth_single_image(im,
|
|
|
737
737
|
"""
|
|
738
738
|
Smooth classifications for a single image. Returns None if no changes are made,
|
|
739
739
|
else a dict.
|
|
740
|
-
|
|
741
|
-
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
740
|
+
|
|
741
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
742
742
|
from which common names and GUIDs have already been removed.
|
|
743
|
-
|
|
743
|
+
|
|
744
744
|
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
745
745
|
have already been remoevd.
|
|
746
746
|
"""
|
|
747
|
-
|
|
747
|
+
|
|
748
748
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
749
749
|
return
|
|
750
|
-
|
|
750
|
+
|
|
751
751
|
detections = im['detections']
|
|
752
|
-
|
|
752
|
+
|
|
753
753
|
# Simplify debugging
|
|
754
754
|
for det in detections:
|
|
755
755
|
det['image_filename'] = im['file']
|
|
756
|
-
|
|
757
|
-
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
758
|
-
options=options,
|
|
756
|
+
|
|
757
|
+
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
758
|
+
options=options,
|
|
759
759
|
other_category_ids=other_category_ids,
|
|
760
|
-
classification_descriptions=classification_descriptions,
|
|
760
|
+
classification_descriptions=classification_descriptions,
|
|
761
761
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
762
762
|
|
|
763
763
|
# Clean out debug information
|
|
764
764
|
for det in detections:
|
|
765
765
|
del det['image_filename']
|
|
766
|
-
|
|
766
|
+
|
|
767
767
|
return to_return
|
|
768
768
|
|
|
769
769
|
# ...def smooth_single_image
|
|
@@ -775,104 +775,104 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
775
775
|
"""
|
|
776
776
|
Smooth classifications at the image level for all results in the MD-formatted results
|
|
777
777
|
file [input_file], optionally writing a new set of results to [output_file].
|
|
778
|
-
|
|
779
|
-
This function generally expresses the notion that an image with 700 cows and one deer
|
|
778
|
+
|
|
779
|
+
This function generally expresses the notion that an image with 700 cows and one deer
|
|
780
780
|
is really just 701 cows.
|
|
781
|
-
|
|
781
|
+
|
|
782
782
|
Only count detections with a classification confidence threshold above
|
|
783
783
|
[options.classification_confidence_threshold], which in practice means we're only
|
|
784
784
|
looking at one category per detection.
|
|
785
|
-
|
|
785
|
+
|
|
786
786
|
If an image has at least [options.min_detections_to_overwrite_secondary] such detections
|
|
787
787
|
in the most common category, and no more than [options.max_detections_nondominant_class]
|
|
788
788
|
in the second-most-common category, flip all detections to the most common
|
|
789
789
|
category.
|
|
790
|
-
|
|
791
|
-
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
790
|
+
|
|
791
|
+
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
792
792
|
"other" class.
|
|
793
|
-
|
|
793
|
+
|
|
794
794
|
This function also removes everything but the non-dominant classification for each detection.
|
|
795
|
-
|
|
795
|
+
|
|
796
796
|
Args:
|
|
797
797
|
input_file (str): MegaDetector-formatted classification results file to smooth. Can
|
|
798
798
|
also be an already-loaded results dict.
|
|
799
799
|
output_file (str, optional): .json file to write smoothed results
|
|
800
|
-
options (ClassificationSmoothingOptions, optional): see
|
|
800
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
801
801
|
ClassificationSmoothingOptions for details.
|
|
802
|
-
|
|
802
|
+
|
|
803
803
|
Returns:
|
|
804
804
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
805
805
|
[output_file] if [output_file] is not None.
|
|
806
806
|
"""
|
|
807
|
-
|
|
807
|
+
|
|
808
808
|
## Input validation
|
|
809
|
-
|
|
809
|
+
|
|
810
810
|
if options is None:
|
|
811
811
|
options = ClassificationSmoothingOptions()
|
|
812
|
-
|
|
812
|
+
|
|
813
813
|
r = _prepare_results_for_smoothing(input_file, options)
|
|
814
814
|
d = r['d']
|
|
815
815
|
other_category_ids = r['other_category_ids']
|
|
816
816
|
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
817
817
|
classification_descriptions = r['classification_descriptions']
|
|
818
|
-
|
|
819
|
-
|
|
818
|
+
|
|
819
|
+
|
|
820
820
|
## Smoothing
|
|
821
|
-
|
|
821
|
+
|
|
822
822
|
n_other_classifications_changed = 0
|
|
823
823
|
n_other_images_changed = 0
|
|
824
824
|
n_taxonomic_images_changed = 0
|
|
825
|
-
|
|
825
|
+
|
|
826
826
|
n_detections_flipped = 0
|
|
827
827
|
n_images_changed = 0
|
|
828
|
-
n_taxonomic_classification_changes = 0
|
|
829
|
-
|
|
830
|
-
# im = d['images'][0]
|
|
828
|
+
n_taxonomic_classification_changes = 0
|
|
829
|
+
|
|
830
|
+
# im = d['images'][0]
|
|
831
831
|
for im in tqdm(d['images']):
|
|
832
|
-
|
|
832
|
+
|
|
833
833
|
r = _smooth_single_image(im,
|
|
834
834
|
options,
|
|
835
835
|
other_category_ids,
|
|
836
836
|
classification_descriptions=classification_descriptions,
|
|
837
837
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
838
|
-
|
|
838
|
+
|
|
839
839
|
if r is None:
|
|
840
840
|
continue
|
|
841
|
-
|
|
841
|
+
|
|
842
842
|
n_detections_flipped_this_image = r['n_detections_flipped_this_image']
|
|
843
843
|
n_other_classifications_changed_this_image = \
|
|
844
844
|
r['n_other_classifications_changed_this_image']
|
|
845
845
|
n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
|
|
846
|
-
|
|
846
|
+
|
|
847
847
|
n_detections_flipped += n_detections_flipped_this_image
|
|
848
848
|
n_other_classifications_changed += n_other_classifications_changed_this_image
|
|
849
849
|
n_taxonomic_classification_changes += n_taxonomic_changes_this_image
|
|
850
|
-
|
|
850
|
+
|
|
851
851
|
if n_detections_flipped_this_image > 0:
|
|
852
852
|
n_images_changed += 1
|
|
853
853
|
if n_other_classifications_changed_this_image > 0:
|
|
854
854
|
n_other_images_changed += 1
|
|
855
855
|
if n_taxonomic_changes_this_image > 0:
|
|
856
856
|
n_taxonomic_images_changed += 1
|
|
857
|
-
|
|
858
|
-
# ...for each image
|
|
859
|
-
|
|
857
|
+
|
|
858
|
+
# ...for each image
|
|
859
|
+
|
|
860
860
|
print('Classification smoothing: changed {} detections on {} images'.format(
|
|
861
861
|
n_detections_flipped,n_images_changed))
|
|
862
|
-
|
|
862
|
+
|
|
863
863
|
print('"Other" smoothing: changed {} detections on {} images'.format(
|
|
864
864
|
n_other_classifications_changed,n_other_images_changed))
|
|
865
|
-
|
|
865
|
+
|
|
866
866
|
print('Taxonomic smoothing: changed {} detections on {} images'.format(
|
|
867
867
|
n_taxonomic_classification_changes,n_taxonomic_images_changed))
|
|
868
|
-
|
|
869
|
-
|
|
868
|
+
|
|
869
|
+
|
|
870
870
|
## Write output
|
|
871
|
-
|
|
872
|
-
if output_file is not None:
|
|
871
|
+
|
|
872
|
+
if output_file is not None:
|
|
873
873
|
print('Writing results after image-level smoothing to:\n{}'.format(output_file))
|
|
874
874
|
with open(output_file,'w') as f:
|
|
875
|
-
json.dump(d,f,indent=1)
|
|
875
|
+
json.dump(d,f,indent=1)
|
|
876
876
|
|
|
877
877
|
return d
|
|
878
878
|
|
|
@@ -880,7 +880,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
880
880
|
|
|
881
881
|
|
|
882
882
|
#%% Sequence-level smoothing
|
|
883
|
-
|
|
883
|
+
|
|
884
884
|
def smooth_classification_results_sequence_level(input_file,
|
|
885
885
|
cct_sequence_information,
|
|
886
886
|
output_file=None,
|
|
@@ -888,10 +888,10 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
888
888
|
"""
|
|
889
889
|
Smooth classifications at the sequence level for all results in the MD-formatted results
|
|
890
890
|
file [md_results_file], optionally writing a new set of results to [output_file].
|
|
891
|
-
|
|
891
|
+
|
|
892
892
|
This function generally expresses the notion that a sequence that looks like
|
|
893
893
|
deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
|
|
894
|
-
|
|
894
|
+
|
|
895
895
|
Args:
|
|
896
896
|
input_file (str or dict): MegaDetector-formatted classification results file to smooth
|
|
897
897
|
(or already-loaded results). If you supply a dict, it's modified in place by default, but
|
|
@@ -899,28 +899,28 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
899
899
|
cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
|
|
900
900
|
each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
|
|
901
901
|
output_file (str, optional): .json file to write smoothed results
|
|
902
|
-
options (ClassificationSmoothingOptions, optional): see
|
|
902
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
903
903
|
ClassificationSmoothingOptions for details.
|
|
904
|
-
|
|
904
|
+
|
|
905
905
|
Returns:
|
|
906
906
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
907
907
|
[output_file] if [output_file] is not None.
|
|
908
908
|
"""
|
|
909
|
-
|
|
909
|
+
|
|
910
910
|
## Input validation
|
|
911
|
-
|
|
911
|
+
|
|
912
912
|
if options is None:
|
|
913
913
|
options = ClassificationSmoothingOptions()
|
|
914
|
-
|
|
914
|
+
|
|
915
915
|
r = _prepare_results_for_smoothing(input_file, options)
|
|
916
916
|
d = r['d']
|
|
917
917
|
other_category_ids = r['other_category_ids']
|
|
918
918
|
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
919
919
|
classification_descriptions = r['classification_descriptions']
|
|
920
|
-
|
|
921
|
-
|
|
920
|
+
|
|
921
|
+
|
|
922
922
|
## Make a list of images appearing in each sequence
|
|
923
|
-
|
|
923
|
+
|
|
924
924
|
if isinstance(cct_sequence_information,list):
|
|
925
925
|
image_info = cct_sequence_information
|
|
926
926
|
elif isinstance(cct_sequence_information,str):
|
|
@@ -931,77 +931,77 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
931
931
|
else:
|
|
932
932
|
assert isinstance(cct_sequence_information,dict)
|
|
933
933
|
image_info = cct_sequence_information['images']
|
|
934
|
-
|
|
934
|
+
|
|
935
935
|
sequence_to_image_filenames = defaultdict(list)
|
|
936
|
-
|
|
936
|
+
|
|
937
937
|
# im = image_info[0]
|
|
938
938
|
for im in tqdm(image_info):
|
|
939
|
-
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
939
|
+
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
940
940
|
del image_info
|
|
941
|
-
|
|
941
|
+
|
|
942
942
|
image_fn_to_classification_results = {}
|
|
943
943
|
for im in d['images']:
|
|
944
944
|
fn = im['file']
|
|
945
945
|
assert fn not in image_fn_to_classification_results
|
|
946
946
|
image_fn_to_classification_results[fn] = im
|
|
947
|
-
|
|
948
|
-
|
|
947
|
+
|
|
948
|
+
|
|
949
949
|
## Smoothing
|
|
950
|
-
|
|
950
|
+
|
|
951
951
|
n_other_classifications_changed = 0
|
|
952
952
|
n_other_sequences_changed = 0
|
|
953
953
|
n_taxonomic_sequences_changed = 0
|
|
954
954
|
n_within_family_sequences_changed = 0
|
|
955
|
-
|
|
955
|
+
|
|
956
956
|
n_detections_flipped = 0
|
|
957
957
|
n_sequences_changed = 0
|
|
958
|
-
n_taxonomic_classification_changes = 0
|
|
959
|
-
n_within_family_changes = 0
|
|
960
|
-
|
|
958
|
+
n_taxonomic_classification_changes = 0
|
|
959
|
+
n_within_family_changes = 0
|
|
960
|
+
|
|
961
961
|
# sequence_id = list(sequence_to_image_filenames.keys())[0]
|
|
962
962
|
for sequence_id in sequence_to_image_filenames.keys():
|
|
963
963
|
|
|
964
964
|
image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
|
|
965
|
-
|
|
965
|
+
|
|
966
966
|
# if 'file' in image_filenames_this_sequence:
|
|
967
967
|
# from IPython import embed; embed()
|
|
968
|
-
|
|
968
|
+
|
|
969
969
|
detections_this_sequence = []
|
|
970
970
|
for image_filename in image_filenames_this_sequence:
|
|
971
971
|
im = image_fn_to_classification_results[image_filename]
|
|
972
972
|
if 'detections' not in im or im['detections'] is None:
|
|
973
973
|
continue
|
|
974
974
|
detections_this_sequence.extend(im['detections'])
|
|
975
|
-
|
|
975
|
+
|
|
976
976
|
# Temporarily add image filenames to every detection,
|
|
977
977
|
# for debugging
|
|
978
978
|
for det in im['detections']:
|
|
979
979
|
det['image_filename'] = im['file']
|
|
980
|
-
|
|
980
|
+
|
|
981
981
|
if len(detections_this_sequence) == 0:
|
|
982
982
|
continue
|
|
983
|
-
|
|
983
|
+
|
|
984
984
|
r = _smooth_classifications_for_list_of_detections(
|
|
985
|
-
detections=detections_this_sequence,
|
|
986
|
-
options=options,
|
|
985
|
+
detections=detections_this_sequence,
|
|
986
|
+
options=options,
|
|
987
987
|
other_category_ids=other_category_ids,
|
|
988
|
-
classification_descriptions=classification_descriptions,
|
|
988
|
+
classification_descriptions=classification_descriptions,
|
|
989
989
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
990
|
-
|
|
990
|
+
|
|
991
991
|
if r is None:
|
|
992
992
|
continue
|
|
993
|
-
|
|
993
|
+
|
|
994
994
|
n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
|
|
995
995
|
n_other_classifications_changed_this_sequence = \
|
|
996
996
|
r['n_other_classifications_changed_this_image']
|
|
997
997
|
n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
|
|
998
998
|
n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
|
|
999
|
-
|
|
999
|
+
|
|
1000
1000
|
n_detections_flipped += n_detections_flipped_this_sequence
|
|
1001
1001
|
n_other_classifications_changed += n_other_classifications_changed_this_sequence
|
|
1002
1002
|
n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
|
|
1003
1003
|
n_within_family_changes += n_within_family_changes_this_sequence
|
|
1004
|
-
|
|
1004
|
+
|
|
1005
1005
|
if n_detections_flipped_this_sequence > 0:
|
|
1006
1006
|
n_sequences_changed += 1
|
|
1007
1007
|
if n_other_classifications_changed_this_sequence > 0:
|
|
@@ -1010,40 +1010,40 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
1010
1010
|
n_taxonomic_sequences_changed += 1
|
|
1011
1011
|
if n_within_family_changes_this_sequence > 0:
|
|
1012
1012
|
n_within_family_sequences_changed += 1
|
|
1013
|
-
|
|
1013
|
+
|
|
1014
1014
|
# ...for each sequence
|
|
1015
|
-
|
|
1015
|
+
|
|
1016
1016
|
print('Classification smoothing: changed {} detections in {} sequences'.format(
|
|
1017
1017
|
n_detections_flipped,n_sequences_changed))
|
|
1018
|
-
|
|
1018
|
+
|
|
1019
1019
|
print('"Other" smoothing: changed {} detections in {} sequences'.format(
|
|
1020
1020
|
n_other_classifications_changed,n_other_sequences_changed))
|
|
1021
|
-
|
|
1021
|
+
|
|
1022
1022
|
print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
|
|
1023
1023
|
n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
|
|
1024
1024
|
|
|
1025
1025
|
print('Within-family smoothing: changed {} detections in {} sequences'.format(
|
|
1026
1026
|
n_within_family_changes,n_within_family_sequences_changed))
|
|
1027
|
-
|
|
1028
|
-
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
1029
|
## Clean up debug information
|
|
1030
|
-
|
|
1030
|
+
|
|
1031
1031
|
for im in d['images']:
|
|
1032
1032
|
if 'detections' not in im or im['detections'] is None:
|
|
1033
1033
|
continue
|
|
1034
1034
|
for det in im['detections']:
|
|
1035
1035
|
if 'image_filename' in det:
|
|
1036
1036
|
del det['image_filename']
|
|
1037
|
-
|
|
1037
|
+
|
|
1038
1038
|
|
|
1039
1039
|
## Write output
|
|
1040
|
-
|
|
1041
|
-
if output_file is not None:
|
|
1040
|
+
|
|
1041
|
+
if output_file is not None:
|
|
1042
1042
|
print('Writing sequence-smoothed classification results to {}'.format(
|
|
1043
|
-
output_file))
|
|
1043
|
+
output_file))
|
|
1044
1044
|
with open(output_file,'w') as f:
|
|
1045
1045
|
json.dump(d,f,indent=1)
|
|
1046
|
-
|
|
1046
|
+
|
|
1047
1047
|
return d
|
|
1048
1048
|
|
|
1049
1049
|
# ...smooth_classification_results_sequence_level(...)
|
|
@@ -1058,14 +1058,14 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1058
1058
|
"""
|
|
1059
1059
|
Given a prediction file in MD .json format, likely without having had
|
|
1060
1060
|
a geofence applied, apply a custom taxa list.
|
|
1061
|
-
|
|
1061
|
+
|
|
1062
1062
|
Args:
|
|
1063
1063
|
taxa_list (str or list): list of latin names, or a text file containing
|
|
1064
1064
|
a list of latin names. Optionally may contain a second (comma-delimited)
|
|
1065
1065
|
column containing common names, used only for debugging. Latin names
|
|
1066
1066
|
must exist in the SpeciesNet taxonomy.
|
|
1067
|
-
speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
|
|
1068
|
-
model release (with 7-token taxonomy entries)
|
|
1067
|
+
speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
|
|
1068
|
+
model release (with 7-token taxonomy entries)
|
|
1069
1069
|
input_file (str): .json file to read, in MD format. This can be None, in which
|
|
1070
1070
|
case this function just validates [taxa_list].
|
|
1071
1071
|
output_file (str): .json file to write, in MD format
|
|
@@ -1075,21 +1075,21 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1075
1075
|
felid predictions be mapped to that species, as opposed to being mapped
|
|
1076
1076
|
to the family?
|
|
1077
1077
|
add_pre_restriction_description (bool, optional): should we add a new metadata
|
|
1078
|
-
field that summarizes each image's classifications prior to taxonomic
|
|
1078
|
+
field that summarizes each image's classifications prior to taxonomic
|
|
1079
1079
|
restriction?
|
|
1080
1080
|
"""
|
|
1081
1081
|
|
|
1082
1082
|
##%% Read target taxa list
|
|
1083
|
-
|
|
1083
|
+
|
|
1084
1084
|
if isinstance(taxa_list,str):
|
|
1085
1085
|
assert os.path.isfile(taxa_list), \
|
|
1086
1086
|
'Could not find taxa list file {}'.format(taxa_list)
|
|
1087
1087
|
with open(taxa_list,'r') as f:
|
|
1088
1088
|
taxa_list = f.readlines()
|
|
1089
|
-
|
|
1089
|
+
|
|
1090
1090
|
taxa_list = [s.strip().lower() for s in taxa_list]
|
|
1091
1091
|
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
1092
|
-
|
|
1092
|
+
|
|
1093
1093
|
target_latin_to_common = {}
|
|
1094
1094
|
for s in taxa_list:
|
|
1095
1095
|
if s.strip().startswith('#'):
|
|
@@ -1105,38 +1105,38 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1105
1105
|
common_name = None
|
|
1106
1106
|
assert binomial_name not in target_latin_to_common
|
|
1107
1107
|
target_latin_to_common[binomial_name] = common_name
|
|
1108
|
-
|
|
1108
|
+
|
|
1109
1109
|
|
|
1110
1110
|
##%% Read taxonomy file
|
|
1111
|
-
|
|
1111
|
+
|
|
1112
1112
|
with open(speciesnet_taxonomy_file,'r') as f:
|
|
1113
1113
|
speciesnet_taxonomy_list = f.readlines()
|
|
1114
1114
|
speciesnet_taxonomy_list = [s.strip() for s in \
|
|
1115
1115
|
speciesnet_taxonomy_list if len(s.strip()) > 0]
|
|
1116
|
-
|
|
1116
|
+
|
|
1117
1117
|
# Maps the latin name of every taxon to the corresponding full taxon string
|
|
1118
1118
|
#
|
|
1119
1119
|
# For species, the key is a binomial name
|
|
1120
1120
|
speciesnet_latin_name_to_taxon_string = {}
|
|
1121
1121
|
speciesnet_common_name_to_taxon_string = {}
|
|
1122
|
-
|
|
1122
|
+
|
|
1123
1123
|
def _insert_taxonomy_string(s):
|
|
1124
|
-
|
|
1124
|
+
|
|
1125
1125
|
tokens = s.split(';')
|
|
1126
1126
|
assert len(tokens) == 7
|
|
1127
|
-
|
|
1127
|
+
|
|
1128
1128
|
guid = tokens[0] # noqa
|
|
1129
1129
|
class_name = tokens[1]
|
|
1130
1130
|
order = tokens[2]
|
|
1131
1131
|
family = tokens[3]
|
|
1132
1132
|
genus = tokens[4]
|
|
1133
|
-
species = tokens[5]
|
|
1133
|
+
species = tokens[5]
|
|
1134
1134
|
common_name = tokens[6]
|
|
1135
|
-
|
|
1135
|
+
|
|
1136
1136
|
if len(class_name) == 0:
|
|
1137
1137
|
assert common_name in ('animal','vehicle','blank')
|
|
1138
1138
|
return
|
|
1139
|
-
|
|
1139
|
+
|
|
1140
1140
|
if len(species) > 0:
|
|
1141
1141
|
assert all([len(s) > 0 for s in [genus,family,order]])
|
|
1142
1142
|
binomial_name = genus + ' ' + species
|
|
@@ -1156,43 +1156,43 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1156
1156
|
else:
|
|
1157
1157
|
if class_name not in speciesnet_latin_name_to_taxon_string:
|
|
1158
1158
|
speciesnet_latin_name_to_taxon_string[class_name] = s
|
|
1159
|
-
|
|
1159
|
+
|
|
1160
1160
|
if len(common_name) > 0:
|
|
1161
1161
|
if common_name not in speciesnet_common_name_to_taxon_string:
|
|
1162
1162
|
speciesnet_common_name_to_taxon_string[common_name] = s
|
|
1163
|
-
|
|
1163
|
+
|
|
1164
1164
|
for s in speciesnet_taxonomy_list:
|
|
1165
|
-
|
|
1165
|
+
|
|
1166
1166
|
_insert_taxonomy_string(s)
|
|
1167
|
-
|
|
1168
|
-
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
1169
|
##%% Make sure all parent taxa are represented in the taxonomy
|
|
1170
|
-
|
|
1170
|
+
|
|
1171
1171
|
# In theory any taxon that appears as the parent of another taxon should
|
|
1172
1172
|
# also be in the taxonomy, but this isn't always true, so we fix it here.
|
|
1173
|
-
|
|
1173
|
+
|
|
1174
1174
|
new_taxon_string_to_missing_tokens = defaultdict(list)
|
|
1175
|
-
|
|
1175
|
+
|
|
1176
1176
|
# latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
|
|
1177
1177
|
for latin_name in speciesnet_latin_name_to_taxon_string.keys():
|
|
1178
|
-
|
|
1178
|
+
|
|
1179
1179
|
if 'no cv result' in latin_name:
|
|
1180
1180
|
continue
|
|
1181
|
-
|
|
1181
|
+
|
|
1182
1182
|
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1183
1183
|
tokens = taxon_string.split(';')
|
|
1184
|
-
|
|
1184
|
+
|
|
1185
1185
|
# Don't process GUID, species, or common name
|
|
1186
1186
|
# i_token = 6
|
|
1187
1187
|
for i_token in range(1,len(tokens)-2):
|
|
1188
|
-
|
|
1189
|
-
test_token = tokens[i_token]
|
|
1188
|
+
|
|
1189
|
+
test_token = tokens[i_token]
|
|
1190
1190
|
if len(test_token) == 0:
|
|
1191
1191
|
continue
|
|
1192
|
-
|
|
1192
|
+
|
|
1193
1193
|
# Do we need to make up a taxon for this token?
|
|
1194
1194
|
if test_token not in speciesnet_latin_name_to_taxon_string:
|
|
1195
|
-
|
|
1195
|
+
|
|
1196
1196
|
new_tokens = [''] * 7
|
|
1197
1197
|
new_tokens[0] = 'fake_guid'
|
|
1198
1198
|
for i_copy_token in range(1,i_token+1):
|
|
@@ -1202,28 +1202,28 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1202
1202
|
new_taxon_string = ';'.join(new_tokens)
|
|
1203
1203
|
# assert new_taxon_string not in new_taxon_strings
|
|
1204
1204
|
new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
|
|
1205
|
-
|
|
1205
|
+
|
|
1206
1206
|
# ...for each token
|
|
1207
|
-
|
|
1207
|
+
|
|
1208
1208
|
# ...for each taxon
|
|
1209
|
-
|
|
1209
|
+
|
|
1210
1210
|
print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
|
|
1211
1211
|
len(new_taxon_string_to_missing_tokens)))
|
|
1212
|
-
|
|
1212
|
+
|
|
1213
1213
|
new_taxon_string_to_missing_tokens = \
|
|
1214
1214
|
sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
|
|
1215
1215
|
for taxon_string in new_taxon_string_to_missing_tokens:
|
|
1216
1216
|
missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
|
|
1217
1217
|
print('{} ({})'.format(taxon_string,missing_taxa))
|
|
1218
|
-
|
|
1218
|
+
|
|
1219
1219
|
for new_taxon_string in new_taxon_string_to_missing_tokens:
|
|
1220
1220
|
_insert_taxonomy_string(new_taxon_string)
|
|
1221
|
-
|
|
1222
|
-
|
|
1221
|
+
|
|
1222
|
+
|
|
1223
1223
|
##%% Make sure all species on the allow-list are in the taxonomy
|
|
1224
|
-
|
|
1224
|
+
|
|
1225
1225
|
n_failed_mappings = 0
|
|
1226
|
-
|
|
1226
|
+
|
|
1227
1227
|
for target_taxon_latin_name in target_latin_to_common.keys():
|
|
1228
1228
|
if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
|
|
1229
1229
|
common_name = target_latin_to_common[target_taxon_latin_name]
|
|
@@ -1234,99 +1234,99 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1234
1234
|
speciesnet_common_name_to_taxon_string[common_name])
|
|
1235
1235
|
print(s)
|
|
1236
1236
|
n_failed_mappings += 1
|
|
1237
|
-
|
|
1237
|
+
|
|
1238
1238
|
if n_failed_mappings > 0:
|
|
1239
1239
|
raise ValueError('Cannot continue with geofence generation')
|
|
1240
|
-
|
|
1241
|
-
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
1242
|
##%% For the allow-list, map each parent taxon to a set of allowable child taxa
|
|
1243
|
-
|
|
1244
|
-
# Maps parent names to all allowed child names, or None if this is the
|
|
1243
|
+
|
|
1244
|
+
# Maps parent names to all allowed child names, or None if this is the
|
|
1245
1245
|
# lowest-level allowable taxon on this path
|
|
1246
1246
|
allowed_parent_taxon_to_child_taxa = defaultdict(set)
|
|
1247
|
-
|
|
1247
|
+
|
|
1248
1248
|
# latin_name = next(iter(target_latin_to_common.keys()))
|
|
1249
1249
|
for latin_name in target_latin_to_common:
|
|
1250
|
-
|
|
1250
|
+
|
|
1251
1251
|
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1252
1252
|
tokens = taxon_string.split(';')
|
|
1253
1253
|
assert len(tokens) == 7
|
|
1254
|
-
|
|
1254
|
+
|
|
1255
1255
|
# Remove GUID and common mame
|
|
1256
1256
|
#
|
|
1257
1257
|
# This is now always class/order/family/genus/species
|
|
1258
1258
|
tokens = tokens[1:-1]
|
|
1259
|
-
|
|
1259
|
+
|
|
1260
1260
|
child_taxon = None
|
|
1261
|
-
|
|
1261
|
+
|
|
1262
1262
|
# If this is a species
|
|
1263
1263
|
if len(tokens[-1]) > 0:
|
|
1264
1264
|
binomial_name = tokens[-2] + ' ' + tokens[-1]
|
|
1265
1265
|
assert binomial_name == latin_name
|
|
1266
1266
|
allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
|
|
1267
1267
|
child_taxon = binomial_name
|
|
1268
|
-
|
|
1269
|
-
# The first candidate parent is the genus
|
|
1268
|
+
|
|
1269
|
+
# The first candidate parent is the genus
|
|
1270
1270
|
parent_token_index = len(tokens) - 2
|
|
1271
1271
|
|
|
1272
1272
|
while(parent_token_index >= 0):
|
|
1273
|
-
|
|
1273
|
+
|
|
1274
1274
|
parent_taxon = tokens[parent_token_index]
|
|
1275
1275
|
allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
|
|
1276
1276
|
child_taxon = parent_taxon
|
|
1277
|
-
parent_token_index -= 1
|
|
1278
|
-
|
|
1277
|
+
parent_token_index -= 1
|
|
1278
|
+
|
|
1279
1279
|
# ...for each allowed latin name
|
|
1280
|
-
|
|
1280
|
+
|
|
1281
1281
|
allowed_parent_taxon_to_child_taxa = \
|
|
1282
1282
|
sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
|
|
1283
|
-
|
|
1284
|
-
|
|
1283
|
+
|
|
1284
|
+
|
|
1285
1285
|
##%% If we were just validating the custom taxa file, we're done
|
|
1286
1286
|
|
|
1287
1287
|
if input_file is None:
|
|
1288
1288
|
print('Finished validating custom taxonomy list')
|
|
1289
1289
|
return
|
|
1290
|
-
|
|
1290
|
+
|
|
1291
1291
|
|
|
1292
1292
|
##%% Map all predictions that exist in this dataset...
|
|
1293
|
-
|
|
1293
|
+
|
|
1294
1294
|
# ...to the prediction we should generate.
|
|
1295
|
-
|
|
1295
|
+
|
|
1296
1296
|
with open(input_file,'r') as f:
|
|
1297
1297
|
input_data = json.load(f)
|
|
1298
|
-
|
|
1298
|
+
|
|
1299
1299
|
input_category_id_to_common_name = input_data['classification_categories'] #noqa
|
|
1300
1300
|
input_category_id_to_taxonomy_string = \
|
|
1301
1301
|
input_data['classification_category_descriptions']
|
|
1302
|
-
|
|
1302
|
+
|
|
1303
1303
|
input_category_id_to_output_taxon_string = {}
|
|
1304
|
-
|
|
1304
|
+
|
|
1305
1305
|
# input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
|
|
1306
1306
|
for input_category_id in input_category_id_to_taxonomy_string.keys():
|
|
1307
|
-
|
|
1307
|
+
|
|
1308
1308
|
input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
|
|
1309
1309
|
input_taxon_tokens = input_taxon_string.split(';')
|
|
1310
1310
|
assert len(input_taxon_tokens) == 7
|
|
1311
|
-
|
|
1311
|
+
|
|
1312
1312
|
# Don't mess with blank/no-cv-result/animal/human
|
|
1313
1313
|
if (input_taxon_string in non_taxonomic_prediction_strings) or \
|
|
1314
1314
|
(input_taxon_string == human_prediction_string):
|
|
1315
1315
|
input_category_id_to_output_taxon_string[input_category_id] = \
|
|
1316
1316
|
input_taxon_string
|
|
1317
1317
|
continue
|
|
1318
|
-
|
|
1318
|
+
|
|
1319
1319
|
# Remove GUID and common mame
|
|
1320
|
-
|
|
1320
|
+
|
|
1321
1321
|
# This is now always class/order/family/genus/species
|
|
1322
1322
|
input_taxon_tokens = input_taxon_tokens[1:-1]
|
|
1323
|
-
|
|
1323
|
+
|
|
1324
1324
|
test_index = len(input_taxon_tokens) - 1
|
|
1325
1325
|
target_taxon = None
|
|
1326
|
-
|
|
1326
|
+
|
|
1327
1327
|
# Start at the species level, and see whether each taxon is allowed
|
|
1328
1328
|
while((test_index >= 0) and (target_taxon is None)):
|
|
1329
|
-
|
|
1329
|
+
|
|
1330
1330
|
# Species are represented as binomial names
|
|
1331
1331
|
if (test_index == (len(input_taxon_tokens) - 1)) and \
|
|
1332
1332
|
(len(input_taxon_tokens[-1]) > 0):
|
|
@@ -1334,27 +1334,27 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1334
1334
|
input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
|
|
1335
1335
|
else:
|
|
1336
1336
|
test_taxon_name = input_taxon_tokens[test_index]
|
|
1337
|
-
|
|
1337
|
+
|
|
1338
1338
|
# If we haven't yet found the level at which this taxon is non-empty,
|
|
1339
1339
|
# keep going up
|
|
1340
|
-
if len(test_taxon_name) == 0:
|
|
1340
|
+
if len(test_taxon_name) == 0:
|
|
1341
1341
|
test_index -= 1
|
|
1342
1342
|
continue
|
|
1343
|
-
|
|
1343
|
+
|
|
1344
1344
|
assert test_taxon_name in speciesnet_latin_name_to_taxon_string
|
|
1345
|
-
|
|
1345
|
+
|
|
1346
1346
|
# Is this taxon allowed according to the custom species list?
|
|
1347
1347
|
if test_taxon_name in allowed_parent_taxon_to_child_taxa:
|
|
1348
|
-
|
|
1348
|
+
|
|
1349
1349
|
allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
|
|
1350
1350
|
assert allowed_child_taxa is not None
|
|
1351
|
-
|
|
1352
|
-
# If this is the lowest-level allowable token or there is not a
|
|
1351
|
+
|
|
1352
|
+
# If this is the lowest-level allowable token or there is not a
|
|
1353
1353
|
# unique child, don't walk any further, even if walking down
|
|
1354
1354
|
# is enabled.
|
|
1355
1355
|
if (None in allowed_child_taxa):
|
|
1356
1356
|
assert len(allowed_child_taxa) == 1
|
|
1357
|
-
|
|
1357
|
+
|
|
1358
1358
|
if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
|
|
1359
1359
|
target_taxon = test_taxon_name
|
|
1360
1360
|
elif not allow_walk_down:
|
|
@@ -1370,72 +1370,72 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1370
1370
|
allowed_child_taxa = \
|
|
1371
1371
|
allowed_parent_taxon_to_child_taxa[candidate_taxon]
|
|
1372
1372
|
target_taxon = candidate_taxon
|
|
1373
|
-
|
|
1373
|
+
|
|
1374
1374
|
# ...if this is an allowed taxon
|
|
1375
|
-
|
|
1375
|
+
|
|
1376
1376
|
test_index -= 1
|
|
1377
|
-
|
|
1377
|
+
|
|
1378
1378
|
# ...for each token
|
|
1379
|
-
|
|
1379
|
+
|
|
1380
1380
|
if target_taxon is None:
|
|
1381
|
-
output_taxon_string = animal_prediction_string
|
|
1381
|
+
output_taxon_string = animal_prediction_string
|
|
1382
1382
|
else:
|
|
1383
1383
|
output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
|
|
1384
|
-
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
1385
|
-
|
|
1384
|
+
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
1385
|
+
|
|
1386
1386
|
# ...for each category
|
|
1387
|
-
|
|
1388
|
-
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
1389
|
##%% Build the new tables
|
|
1390
|
-
|
|
1390
|
+
|
|
1391
1391
|
input_category_id_to_output_category_id = {}
|
|
1392
1392
|
output_taxon_string_to_category_id = {}
|
|
1393
1393
|
output_category_id_to_common_name = {}
|
|
1394
|
-
|
|
1394
|
+
|
|
1395
1395
|
for input_category_id in input_category_id_to_output_taxon_string:
|
|
1396
|
-
|
|
1396
|
+
|
|
1397
1397
|
original_common_name = \
|
|
1398
1398
|
input_category_id_to_common_name[input_category_id]
|
|
1399
1399
|
original_taxon_string = \
|
|
1400
1400
|
input_category_id_to_taxonomy_string[input_category_id]
|
|
1401
1401
|
output_taxon_string = \
|
|
1402
1402
|
input_category_id_to_output_taxon_string[input_category_id]
|
|
1403
|
-
|
|
1403
|
+
|
|
1404
1404
|
output_common_name = output_taxon_string.split(';')[-1]
|
|
1405
|
-
|
|
1405
|
+
|
|
1406
1406
|
# Do we need to create a new output category?
|
|
1407
1407
|
if output_taxon_string not in output_taxon_string_to_category_id:
|
|
1408
1408
|
output_category_id = str(len(output_taxon_string_to_category_id))
|
|
1409
1409
|
output_taxon_string_to_category_id[output_taxon_string] = \
|
|
1410
1410
|
output_category_id
|
|
1411
1411
|
output_category_id_to_common_name[output_category_id] = \
|
|
1412
|
-
output_common_name
|
|
1412
|
+
output_common_name
|
|
1413
1413
|
else:
|
|
1414
1414
|
output_category_id = \
|
|
1415
1415
|
output_taxon_string_to_category_id[output_taxon_string]
|
|
1416
|
-
|
|
1416
|
+
|
|
1417
1417
|
input_category_id_to_output_category_id[input_category_id] = \
|
|
1418
1418
|
output_category_id
|
|
1419
|
-
|
|
1419
|
+
|
|
1420
1420
|
if False:
|
|
1421
1421
|
print('Mapping {} ({}) to:\n{} ({})\n'.format(
|
|
1422
1422
|
original_common_name,original_taxon_string,
|
|
1423
1423
|
output_common_name,output_taxon_string))
|
|
1424
|
-
if False:
|
|
1424
|
+
if False:
|
|
1425
1425
|
print('Mapping {} to {}'.format(
|
|
1426
1426
|
original_common_name,output_common_name,))
|
|
1427
|
-
|
|
1427
|
+
|
|
1428
1428
|
# ...for each category
|
|
1429
|
-
|
|
1430
|
-
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
1431
|
##%% Remap all category labels
|
|
1432
|
-
|
|
1432
|
+
|
|
1433
1433
|
assert len(set(output_taxon_string_to_category_id.keys())) == \
|
|
1434
1434
|
len(set(output_taxon_string_to_category_id.values()))
|
|
1435
|
-
|
|
1435
|
+
|
|
1436
1436
|
output_category_id_to_taxon_string = \
|
|
1437
1437
|
invert_dictionary(output_taxon_string_to_category_id)
|
|
1438
|
-
|
|
1438
|
+
|
|
1439
1439
|
with open(input_file,'r') as f:
|
|
1440
1440
|
output_data = json.load(f)
|
|
1441
1441
|
|
|
@@ -1447,7 +1447,7 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1447
1447
|
|
|
1448
1448
|
if 'detections' not in im or im['detections'] is None:
|
|
1449
1449
|
continue
|
|
1450
|
-
|
|
1450
|
+
|
|
1451
1451
|
# Possibly prepare a pre-filtering description
|
|
1452
1452
|
pre_filtering_description = None
|
|
1453
1453
|
if classification_descriptions is not None and add_pre_filtering_description:
|
|
@@ -1462,16 +1462,16 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1462
1462
|
classification[0] = \
|
|
1463
1463
|
input_category_id_to_output_category_id[classification[0]]
|
|
1464
1464
|
|
|
1465
|
-
# ...for each image
|
|
1466
|
-
|
|
1465
|
+
# ...for each image
|
|
1466
|
+
|
|
1467
1467
|
output_data['classification_categories'] = output_category_id_to_common_name
|
|
1468
1468
|
output_data['classification_category_descriptions'] = \
|
|
1469
1469
|
output_category_id_to_taxon_string
|
|
1470
|
-
|
|
1471
|
-
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
1472
|
##%% Write output
|
|
1473
|
-
|
|
1473
|
+
|
|
1474
1474
|
with open(output_file,'w') as f:
|
|
1475
1475
|
json.dump(output_data,f,indent=1)
|
|
1476
|
-
|
|
1476
|
+
|
|
1477
1477
|
# ...def restrict_to_taxa_list(...)
|