megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
classification_postprocessing.py
|
|
4
4
|
|
|
5
5
|
Functions for postprocessing species classification results, particularly:
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
* Smoothing results within an image (an image with 700 cows and one deer is really just 701
|
|
8
8
|
cows)
|
|
9
9
|
* Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
|
|
10
10
|
is really just a deer)
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
#%% Constants and imports
|
|
@@ -32,7 +32,7 @@ from megadetector.utils.wi_utils import taxonomy_level_string_to_index
|
|
|
32
32
|
from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
|
|
33
33
|
from megadetector.utils.wi_utils import human_prediction_string
|
|
34
34
|
from megadetector.utils.wi_utils import animal_prediction_string
|
|
35
|
-
from megadetector.utils.wi_utils import blank_prediction_string
|
|
35
|
+
from megadetector.utils.wi_utils import blank_prediction_string # noqa
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
#%% Options classes
|
|
@@ -44,86 +44,94 @@ class ClassificationSmoothingOptions:
|
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
46
|
def __init__(self):
|
|
47
|
-
|
|
48
|
-
#: How many detections do we need in a dominant category to overwrite
|
|
49
|
-
#: non-dominant classifications? This is irrelevant if
|
|
47
|
+
|
|
48
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
49
|
+
#: non-dominant classifications? This is irrelevant if
|
|
50
50
|
#: max_detections_nondominant_class <= 1.
|
|
51
51
|
self.min_detections_to_overwrite_secondary = 4
|
|
52
|
-
|
|
53
|
-
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
52
|
+
|
|
53
|
+
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
54
54
|
#: this many classifications in an image, leave them alone.
|
|
55
55
|
#:
|
|
56
56
|
#: If this is <= 1, we won't replace non-dominant, non-other classes
|
|
57
57
|
#: with the dominant class, even if there are 900 cows and 1 deer.
|
|
58
58
|
self.max_detections_nondominant_class = 1
|
|
59
|
-
|
|
60
|
-
#: How many detections do we need in a dominant category to overwrite
|
|
61
|
-
#: non-dominant classifications in the same family? If this is <= 0,
|
|
62
|
-
#: we'll skip this step. This option doesn't mean anything if
|
|
59
|
+
|
|
60
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
61
|
+
#: non-dominant classifications in the same family? If this is <= 0,
|
|
62
|
+
#: we'll skip this step. This option doesn't mean anything if
|
|
63
63
|
#: max_detections_nondominant_class_same_family <= 1.
|
|
64
64
|
self.min_detections_to_overwrite_secondary_same_family = 2
|
|
65
|
-
|
|
66
|
-
#: If we have this many classifications of a nondominant category,
|
|
65
|
+
|
|
66
|
+
#: If we have this many classifications of a nondominant category,
|
|
67
67
|
#: we won't do same-family overwrites. <= 1 means "even if there are
|
|
68
68
|
#: a million deer, if there are two million moose, call all the deer
|
|
69
|
-
#: moose". This option doesn't mean anything if
|
|
69
|
+
#: moose". This option doesn't mean anything if
|
|
70
70
|
#: min_detections_to_overwrite_secondary_same_family <= 0.
|
|
71
71
|
self.max_detections_nondominant_class_same_family = -1
|
|
72
|
-
|
|
73
|
-
#: If the dominant class has at least this many classifications, overwrite
|
|
72
|
+
|
|
73
|
+
#: If the dominant class has at least this many classifications, overwrite
|
|
74
74
|
#: "other" classifications with the dominant class
|
|
75
75
|
self.min_detections_to_overwrite_other = 2
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
#: Names to treat as "other" categories; can't be None, but can be empty
|
|
78
78
|
#:
|
|
79
79
|
#: "Other" classifications will be changed to the dominant category, regardless
|
|
80
|
-
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
80
|
+
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
81
81
|
#: examples of the dominant class. For example, cow/other will remain unchanged,
|
|
82
82
|
#: but cow/cow/other will become cow/cow/cow.
|
|
83
83
|
self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
#: We're not even going to mess around with classifications below this threshold.
|
|
86
86
|
#:
|
|
87
87
|
#: We won't count them, we won't over-write them, they don't exist during the
|
|
88
88
|
#: within-image smoothing step.
|
|
89
89
|
self.classification_confidence_threshold = 0.5
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
#: We're not even going to mess around with detections below this threshold.
|
|
92
92
|
#:
|
|
93
93
|
#: We won't count them, we won't over-write them, they don't exist during the
|
|
94
94
|
#: within-image smoothing step.
|
|
95
95
|
self.detection_confidence_threshold = 0.15
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
#: If classification descriptions are present and appear to represent taxonomic
|
|
98
|
-
#: information, should we propagate classifications when lower-level taxa are more
|
|
99
|
-
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
98
|
+
#: information, should we propagate classifications when lower-level taxa are more
|
|
99
|
+
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
100
100
|
#: we make that "fox/fox/fox/deer"?
|
|
101
101
|
self.propagate_classifications_through_taxonomy = True
|
|
102
|
-
|
|
103
|
-
#: When propagating classifications down through taxonomy levels, we have to
|
|
102
|
+
|
|
103
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
104
104
|
#: decide whether we prefer more frequent categories or more specific categories.
|
|
105
105
|
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
106
106
|
#: balance levels against counts in this process.
|
|
107
107
|
self.taxonomy_propagation_level_weight = 1.0
|
|
108
|
-
|
|
109
|
-
#: When propagating classifications down through taxonomy levels, we have to
|
|
108
|
+
|
|
109
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
110
110
|
#: decide whether we prefer more frequent categories or more specific categories.
|
|
111
111
|
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
112
112
|
#: balance levels against counts in this process.
|
|
113
113
|
#:
|
|
114
114
|
#: With a very low default value, this just breaks ties.
|
|
115
115
|
self.taxonomy_propagation_count_weight = 0.01
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
#: Should we record information about the state of labels prior to smoothing?
|
|
118
118
|
self.add_pre_smoothing_description = True
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
#: When a dict (rather than a file) is passed to either smoothing function,
|
|
121
121
|
#: if this is True, we'll make a copy of the input dict before modifying.
|
|
122
122
|
self.modify_in_place = False
|
|
123
|
-
|
|
123
|
+
|
|
124
|
+
#: Only include these categories in the smoothing process (None to use all categories)
|
|
125
|
+
self.detection_category_names_to_smooth = ['animal']
|
|
126
|
+
|
|
124
127
|
#: Debug options
|
|
125
128
|
self.break_at_image = None
|
|
126
129
|
|
|
130
|
+
## Populated internally
|
|
131
|
+
|
|
132
|
+
#: #: Only include these categories in the smoothing process (None to use all categories)
|
|
133
|
+
self._detection_category_ids_to_smooth = None
|
|
134
|
+
|
|
127
135
|
|
|
128
136
|
#%% Utility functions
|
|
129
137
|
|
|
@@ -131,60 +139,79 @@ def _results_for_sequence(images_this_sequence,filename_to_results):
|
|
|
131
139
|
"""
|
|
132
140
|
Fetch MD results for every image in this sequence, based on the 'file_name' field
|
|
133
141
|
"""
|
|
134
|
-
|
|
142
|
+
|
|
135
143
|
results_this_sequence = []
|
|
136
144
|
for im in images_this_sequence:
|
|
137
145
|
fn = im['file_name']
|
|
138
146
|
results_this_image = filename_to_results[fn]
|
|
139
147
|
assert isinstance(results_this_image,dict)
|
|
140
148
|
results_this_sequence.append(results_this_image)
|
|
141
|
-
|
|
149
|
+
|
|
142
150
|
return results_this_sequence
|
|
143
|
-
|
|
144
|
-
|
|
151
|
+
|
|
152
|
+
|
|
145
153
|
def _sort_images_by_time(images):
|
|
146
154
|
"""
|
|
147
155
|
Returns a copy of [images], sorted by the 'datetime' field (ascending).
|
|
148
156
|
"""
|
|
149
|
-
return sorted(images, key = lambda im: im['datetime'])
|
|
157
|
+
return sorted(images, key = lambda im: im['datetime'])
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _detection_is_relevant_for_smoothing(det,options):
|
|
161
|
+
"""
|
|
162
|
+
Determine whether [det] has classifications that might be meaningful for smoothing.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
if ('classifications' not in det) or \
|
|
166
|
+
(det['conf'] < options.detection_confidence_threshold):
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
# Ignore non-smoothed categories
|
|
170
|
+
if (options._detection_category_ids_to_smooth is not None) and \
|
|
171
|
+
(det['category'] not in options._detection_category_ids_to_smooth):
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
return True
|
|
150
175
|
|
|
151
176
|
|
|
152
177
|
def count_detections_by_classification_category(detections,options=None):
|
|
153
178
|
"""
|
|
154
179
|
Count the number of instances of each classification category in the detections list
|
|
155
|
-
[detections] that have an above-threshold detection. Sort results in descending
|
|
180
|
+
[detections] that have an above-threshold detection. Sort results in descending
|
|
156
181
|
order by count. Returns a dict mapping category ID --> count. If no detections
|
|
157
182
|
are above threshold, returns an empty dict.
|
|
158
|
-
|
|
183
|
+
|
|
159
184
|
Only processes the top classification for each detection.
|
|
160
185
|
|
|
161
186
|
Args:
|
|
162
|
-
detections: detections list
|
|
187
|
+
detections (list of dict): detections list
|
|
163
188
|
options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
|
|
164
189
|
|
|
165
190
|
Returns:
|
|
166
191
|
dict mapping above-threshold category IDs to counts
|
|
167
192
|
"""
|
|
168
|
-
|
|
193
|
+
|
|
169
194
|
if detections is None or len(detections) == 0:
|
|
170
195
|
return {}
|
|
171
|
-
|
|
196
|
+
|
|
172
197
|
if options is None:
|
|
173
198
|
options = ClassificationSmoothingOptions()
|
|
174
199
|
|
|
175
200
|
category_to_count = defaultdict(int)
|
|
176
|
-
|
|
201
|
+
|
|
177
202
|
for det in detections:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
203
|
+
|
|
204
|
+
if not _detection_is_relevant_for_smoothing(det,options):
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
c = det['classifications'][0]
|
|
208
|
+
if c[1] >= options.classification_confidence_threshold:
|
|
209
|
+
category_to_count[c[0]] += 1
|
|
210
|
+
|
|
184
211
|
category_to_count = {k: v for k, v in sorted(category_to_count.items(),
|
|
185
|
-
key=lambda item: item[1],
|
|
212
|
+
key=lambda item: item[1],
|
|
186
213
|
reverse=True)}
|
|
187
|
-
|
|
214
|
+
|
|
188
215
|
return category_to_count
|
|
189
216
|
|
|
190
217
|
|
|
@@ -199,7 +226,7 @@ def get_classification_description_string(category_to_count,classification_descr
|
|
|
199
226
|
Returns:
|
|
200
227
|
string: a description of this image's content, e.g. "rabbit (4), human (1)"
|
|
201
228
|
"""
|
|
202
|
-
|
|
229
|
+
|
|
203
230
|
category_strings = []
|
|
204
231
|
# category_id = next(iter(category_to_count))
|
|
205
232
|
for category_id in category_to_count:
|
|
@@ -212,29 +239,31 @@ def get_classification_description_string(category_to_count,classification_descr
|
|
|
212
239
|
count = category_to_count[category_id]
|
|
213
240
|
category_string = '{} ({})'.format(category_name,count)
|
|
214
241
|
category_strings.append(category_string)
|
|
215
|
-
|
|
242
|
+
|
|
216
243
|
return ', '.join(category_strings)
|
|
217
|
-
|
|
244
|
+
|
|
218
245
|
|
|
219
246
|
def _print_counts_with_names(category_to_count,classification_descriptions):
|
|
220
247
|
"""
|
|
221
248
|
Print a list of classification categories with counts, based in the name --> count
|
|
222
249
|
dict [category_to_count]
|
|
223
250
|
"""
|
|
224
|
-
|
|
251
|
+
|
|
225
252
|
for category_id in category_to_count:
|
|
226
253
|
category_name = classification_descriptions[category_id]
|
|
227
254
|
count = category_to_count[category_id]
|
|
228
255
|
print('{}: {} ({})'.format(category_id,category_name,count))
|
|
229
|
-
|
|
230
|
-
|
|
256
|
+
|
|
257
|
+
|
|
231
258
|
def _prepare_results_for_smoothing(input_file,options):
|
|
232
259
|
"""
|
|
233
|
-
Load results from [input_file] if necessary, prepare category descriptions
|
|
260
|
+
Load results from [input_file] if necessary, prepare category descriptions
|
|
234
261
|
for smoothing. Adds pre-smoothing descriptions to every image if the options
|
|
235
262
|
say we're supposed to do that.
|
|
263
|
+
|
|
264
|
+
May modify some fields in [options].
|
|
236
265
|
"""
|
|
237
|
-
|
|
266
|
+
|
|
238
267
|
if isinstance(input_file,str):
|
|
239
268
|
with open(input_file,'r') as f:
|
|
240
269
|
print('Loading results from:\n{}'.format(input_file))
|
|
@@ -249,71 +278,82 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
249
278
|
|
|
250
279
|
|
|
251
280
|
## Category processing
|
|
252
|
-
|
|
281
|
+
|
|
253
282
|
category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
|
|
254
283
|
other_category_ids = []
|
|
255
284
|
for s in options.other_category_names:
|
|
256
285
|
if s in category_name_to_id:
|
|
257
286
|
other_category_ids.append(category_name_to_id[s])
|
|
258
|
-
|
|
287
|
+
|
|
288
|
+
# Possibly update the list of category IDs we should smooth
|
|
289
|
+
if options.detection_category_names_to_smooth is None:
|
|
290
|
+
options._detection_category_ids_to_smooth = None
|
|
291
|
+
else:
|
|
292
|
+
detection_category_id_to_name = d['detection_categories']
|
|
293
|
+
detection_category_name_to_id = invert_dictionary(detection_category_id_to_name)
|
|
294
|
+
options._detection_category_ids_to_smooth = []
|
|
295
|
+
for category_name in options.detection_category_names_to_smooth:
|
|
296
|
+
options._detection_category_ids_to_smooth.append(detection_category_name_to_id[category_name])
|
|
297
|
+
|
|
259
298
|
# Before we do anything else, get rid of everything but the top classification
|
|
260
299
|
# for each detection, and remove the 'classifications' field from detections with
|
|
261
300
|
# no classifications.
|
|
262
301
|
for im in tqdm(d['images']):
|
|
263
|
-
|
|
302
|
+
|
|
264
303
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
265
304
|
continue
|
|
266
|
-
|
|
305
|
+
|
|
267
306
|
detections = im['detections']
|
|
268
|
-
|
|
307
|
+
|
|
269
308
|
for det in detections:
|
|
270
|
-
|
|
309
|
+
|
|
271
310
|
if 'classifications' not in det:
|
|
272
311
|
continue
|
|
273
312
|
if len(det['classifications']) == 0:
|
|
274
313
|
del det['classifications']
|
|
275
314
|
continue
|
|
276
|
-
|
|
315
|
+
|
|
277
316
|
classification_confidence_values = [c[1] for c in det['classifications']]
|
|
278
317
|
assert is_list_sorted(classification_confidence_values,reverse=True)
|
|
279
318
|
det['classifications'] = [det['classifications'][0]]
|
|
280
|
-
|
|
319
|
+
|
|
281
320
|
# ...for each detection in this image
|
|
282
|
-
|
|
321
|
+
|
|
283
322
|
# ...for each image
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
## Clean up classification descriptions
|
|
287
|
-
|
|
288
|
-
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
## Clean up classification descriptions...
|
|
326
|
+
|
|
327
|
+
# ...so we can test taxonomic relationships by substring testing.
|
|
328
|
+
|
|
289
329
|
classification_descriptions_clean = None
|
|
290
330
|
classification_descriptions = None
|
|
291
|
-
|
|
331
|
+
|
|
292
332
|
if 'classification_category_descriptions' in d:
|
|
293
333
|
classification_descriptions = d['classification_category_descriptions']
|
|
294
334
|
classification_descriptions_clean = {}
|
|
295
335
|
# category_id = next(iter(classification_descriptions))
|
|
296
|
-
for category_id in classification_descriptions:
|
|
336
|
+
for category_id in classification_descriptions:
|
|
297
337
|
classification_descriptions_clean[category_id] = \
|
|
298
338
|
clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
|
|
299
|
-
|
|
300
|
-
|
|
339
|
+
|
|
340
|
+
|
|
301
341
|
## Optionally add pre-smoothing descriptions to every image
|
|
302
|
-
|
|
303
|
-
if options.add_pre_smoothing_description:
|
|
304
|
-
|
|
342
|
+
|
|
343
|
+
if options.add_pre_smoothing_description and (classification_descriptions is not None):
|
|
344
|
+
|
|
305
345
|
for im in tqdm(d['images']):
|
|
306
|
-
|
|
346
|
+
|
|
307
347
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
308
348
|
continue
|
|
309
|
-
|
|
310
|
-
detections = im['detections']
|
|
349
|
+
|
|
350
|
+
detections = im['detections']
|
|
311
351
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
312
|
-
|
|
352
|
+
|
|
313
353
|
im['pre_smoothing_description'] = \
|
|
314
354
|
get_classification_description_string(category_to_count, classification_descriptions)
|
|
315
|
-
|
|
316
|
-
|
|
355
|
+
|
|
356
|
+
|
|
317
357
|
return {
|
|
318
358
|
'd':d,
|
|
319
359
|
'other_category_ids':other_category_ids,
|
|
@@ -321,7 +361,7 @@ def _prepare_results_for_smoothing(input_file,options):
|
|
|
321
361
|
'classification_descriptions':classification_descriptions
|
|
322
362
|
}
|
|
323
363
|
|
|
324
|
-
# ...def _prepare_results_for_smoothing(...)
|
|
364
|
+
# ...def _prepare_results_for_smoothing(...)
|
|
325
365
|
|
|
326
366
|
|
|
327
367
|
def _smooth_classifications_for_list_of_detections(detections,
|
|
@@ -332,142 +372,140 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
332
372
|
"""
|
|
333
373
|
Smooth classifications for a list of detections, which may have come from a single
|
|
334
374
|
image, or may represent an entire sequence.
|
|
335
|
-
|
|
375
|
+
|
|
336
376
|
Returns None if no changes are made, else a dict.
|
|
337
|
-
|
|
338
|
-
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
377
|
+
|
|
378
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
339
379
|
from which common names and GUIDs have already been removed.
|
|
340
|
-
|
|
380
|
+
|
|
341
381
|
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
342
|
-
have already been remoevd.
|
|
382
|
+
have already been remoevd.
|
|
343
383
|
"""
|
|
344
|
-
|
|
384
|
+
|
|
345
385
|
## Count the number of instances of each category in this image
|
|
346
|
-
|
|
386
|
+
|
|
347
387
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
348
388
|
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
349
389
|
# get_classification_description_string(category_to_count, classification_descriptions)
|
|
350
|
-
|
|
390
|
+
|
|
351
391
|
if len(category_to_count) <= 1:
|
|
352
392
|
return None
|
|
353
|
-
|
|
393
|
+
|
|
354
394
|
keys = list(category_to_count.keys())
|
|
355
|
-
|
|
356
|
-
# Handle a quirky special case: if the most common category is "other" and
|
|
395
|
+
|
|
396
|
+
# Handle a quirky special case: if the most common category is "other" and
|
|
357
397
|
# it's "tied" with the second-most-common category, swap them
|
|
358
398
|
if (len(keys) > 1) and \
|
|
359
399
|
(keys[0] in other_category_ids) and \
|
|
360
400
|
(keys[1] not in other_category_ids) and \
|
|
361
401
|
(category_to_count[keys[0]] == category_to_count[keys[1]]):
|
|
362
402
|
keys[1], keys[0] = keys[0], keys[1]
|
|
363
|
-
|
|
364
|
-
max_count = category_to_count[keys[0]]
|
|
403
|
+
|
|
404
|
+
max_count = category_to_count[keys[0]]
|
|
365
405
|
most_common_category = keys[0]
|
|
366
406
|
del keys
|
|
367
|
-
|
|
368
|
-
|
|
407
|
+
|
|
408
|
+
|
|
369
409
|
## Debug tools
|
|
370
|
-
|
|
410
|
+
|
|
371
411
|
verbose_debug_enabled = False
|
|
372
|
-
|
|
412
|
+
|
|
373
413
|
if options.break_at_image is not None:
|
|
374
414
|
for det in detections:
|
|
375
415
|
if 'image_filename' in det and \
|
|
376
416
|
det['image_filename'] == options.break_at_image:
|
|
377
417
|
verbose_debug_enabled = True
|
|
378
418
|
break
|
|
379
|
-
|
|
419
|
+
|
|
380
420
|
if verbose_debug_enabled:
|
|
381
421
|
_print_counts_with_names(category_to_count,classification_descriptions)
|
|
382
422
|
from IPython import embed; embed()
|
|
383
|
-
|
|
384
|
-
|
|
423
|
+
|
|
424
|
+
|
|
385
425
|
## Possibly change "other" classifications to the most common category
|
|
386
|
-
|
|
426
|
+
|
|
387
427
|
# ...if the dominant category is not an "other" category.
|
|
388
|
-
|
|
428
|
+
|
|
389
429
|
n_other_classifications_changed_this_image = 0
|
|
390
|
-
|
|
430
|
+
|
|
391
431
|
# If we have at least *min_detections_to_overwrite_other* in a category that isn't
|
|
392
432
|
# "other", change all "other" classifications to that category
|
|
393
433
|
if (max_count >= options.min_detections_to_overwrite_other) and \
|
|
394
434
|
(most_common_category not in other_category_ids):
|
|
395
|
-
|
|
435
|
+
|
|
396
436
|
for det in detections:
|
|
397
|
-
|
|
398
|
-
if
|
|
399
|
-
(det['conf'] < options.detection_confidence_threshold):
|
|
437
|
+
|
|
438
|
+
if not _detection_is_relevant_for_smoothing(det,options):
|
|
400
439
|
continue
|
|
401
|
-
|
|
440
|
+
|
|
402
441
|
assert len(det['classifications']) == 1
|
|
403
442
|
c = det['classifications'][0]
|
|
404
|
-
|
|
443
|
+
|
|
405
444
|
if (c[1] >= options.classification_confidence_threshold) and \
|
|
406
445
|
(c[0] in other_category_ids):
|
|
407
|
-
|
|
446
|
+
|
|
408
447
|
if verbose_debug_enabled:
|
|
409
448
|
print('Replacing {} with {}'.format(
|
|
410
449
|
classification_descriptions[c[0]],
|
|
411
450
|
classification_descriptions[c[1]]))
|
|
412
|
-
|
|
451
|
+
|
|
413
452
|
n_other_classifications_changed_this_image += 1
|
|
414
453
|
c[0] = most_common_category
|
|
415
|
-
|
|
454
|
+
|
|
416
455
|
# ...if there are classifications for this detection
|
|
417
|
-
|
|
456
|
+
|
|
418
457
|
# ...for each detection
|
|
419
|
-
|
|
458
|
+
|
|
420
459
|
# ...if we should overwrite all "other" classifications
|
|
421
460
|
|
|
422
461
|
if verbose_debug_enabled:
|
|
423
462
|
print('Made {} other changes'.format(n_other_classifications_changed_this_image))
|
|
424
|
-
|
|
425
|
-
|
|
463
|
+
|
|
464
|
+
|
|
426
465
|
## Re-count
|
|
427
|
-
|
|
466
|
+
|
|
428
467
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
429
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
468
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
430
469
|
keys = list(category_to_count.keys())
|
|
431
|
-
max_count = category_to_count[keys[0]]
|
|
470
|
+
max_count = category_to_count[keys[0]]
|
|
432
471
|
most_common_category = keys[0]
|
|
433
472
|
del keys
|
|
434
|
-
|
|
435
|
-
|
|
473
|
+
|
|
474
|
+
|
|
436
475
|
## Possibly change some non-dominant classifications to the dominant category
|
|
437
|
-
|
|
476
|
+
|
|
438
477
|
process_taxonomic_rules = \
|
|
439
478
|
(classification_descriptions_clean is not None) and \
|
|
440
479
|
(len(classification_descriptions_clean) > 0) and \
|
|
441
480
|
(len(category_to_count) > 1)
|
|
442
|
-
|
|
481
|
+
|
|
443
482
|
n_detections_flipped_this_image = 0
|
|
444
|
-
|
|
445
|
-
# Don't do this if the most common category is an "other" category, or
|
|
483
|
+
|
|
484
|
+
# Don't do this if the most common category is an "other" category, or
|
|
446
485
|
# if we don't have enough of the most common category
|
|
447
486
|
if (most_common_category not in other_category_ids) and \
|
|
448
487
|
(max_count >= options.min_detections_to_overwrite_secondary):
|
|
449
|
-
|
|
488
|
+
|
|
450
489
|
# i_det = 0; det = detections[i_det]
|
|
451
490
|
for i_det,det in enumerate(detections):
|
|
452
|
-
|
|
453
|
-
if
|
|
454
|
-
(det['conf'] < options.detection_confidence_threshold):
|
|
491
|
+
|
|
492
|
+
if not _detection_is_relevant_for_smoothing(det,options):
|
|
455
493
|
continue
|
|
456
|
-
|
|
494
|
+
|
|
457
495
|
assert len(det['classifications']) == 1
|
|
458
496
|
c = det['classifications'][0]
|
|
459
|
-
|
|
497
|
+
|
|
460
498
|
# Don't over-write the most common category with itself
|
|
461
499
|
if c[0] == most_common_category:
|
|
462
500
|
continue
|
|
463
|
-
|
|
501
|
+
|
|
464
502
|
# Don't bother with below-threshold classifications
|
|
465
503
|
if c[1] < options.classification_confidence_threshold:
|
|
466
504
|
continue
|
|
467
|
-
|
|
505
|
+
|
|
468
506
|
# If we're doing taxonomic processing, at this stage, don't turn children
|
|
469
507
|
# into parents; we'll likely turn parents into children in the next stage.
|
|
470
|
-
|
|
508
|
+
|
|
471
509
|
if process_taxonomic_rules:
|
|
472
510
|
|
|
473
511
|
most_common_category_description = \
|
|
@@ -475,180 +513,179 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
475
513
|
|
|
476
514
|
category_id_this_classification = c[0]
|
|
477
515
|
assert category_id_this_classification in category_to_count
|
|
478
|
-
|
|
516
|
+
|
|
479
517
|
category_description_this_classification = \
|
|
480
518
|
classification_descriptions_clean[category_id_this_classification]
|
|
481
|
-
|
|
482
|
-
# An empty description corresponds to the "animal" category. We don't handle
|
|
483
|
-
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
519
|
+
|
|
520
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
521
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
484
522
|
# step above.
|
|
485
523
|
if len(category_description_this_classification) == 0:
|
|
486
524
|
continue
|
|
487
|
-
|
|
525
|
+
|
|
488
526
|
most_common_category_is_parent_of_this_category = \
|
|
489
527
|
most_common_category_description in category_description_this_classification
|
|
490
|
-
|
|
528
|
+
|
|
491
529
|
if most_common_category_is_parent_of_this_category:
|
|
492
530
|
continue
|
|
493
|
-
|
|
531
|
+
|
|
494
532
|
# If we have fewer of this category than the most common category,
|
|
495
533
|
# but not *too* many, flip it to the most common category.
|
|
496
534
|
if (max_count > category_to_count[c[0]]) and \
|
|
497
535
|
(category_to_count[c[0]] <= options.max_detections_nondominant_class):
|
|
498
|
-
|
|
536
|
+
|
|
499
537
|
c[0] = most_common_category
|
|
500
|
-
n_detections_flipped_this_image += 1
|
|
501
|
-
|
|
538
|
+
n_detections_flipped_this_image += 1
|
|
539
|
+
|
|
502
540
|
# ...for each detection
|
|
503
541
|
|
|
504
|
-
# ...if the dominant category is legit
|
|
505
|
-
|
|
542
|
+
# ...if the dominant category is legit
|
|
543
|
+
|
|
506
544
|
if verbose_debug_enabled:
|
|
507
545
|
print('Made {} non-dominant --> dominant changes'.format(
|
|
508
546
|
n_detections_flipped_this_image))
|
|
509
547
|
|
|
510
|
-
|
|
548
|
+
|
|
511
549
|
## Re-count
|
|
512
|
-
|
|
550
|
+
|
|
513
551
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
514
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
552
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
515
553
|
keys = list(category_to_count.keys())
|
|
516
|
-
max_count = category_to_count[keys[0]]
|
|
554
|
+
max_count = category_to_count[keys[0]]
|
|
517
555
|
most_common_category = keys[0]
|
|
518
556
|
del keys
|
|
519
|
-
|
|
520
|
-
|
|
557
|
+
|
|
558
|
+
|
|
521
559
|
## Possibly collapse higher-level taxonomic predictions down to lower levels
|
|
522
|
-
|
|
560
|
+
|
|
523
561
|
n_taxonomic_changes_this_image = 0
|
|
524
|
-
|
|
562
|
+
|
|
525
563
|
process_taxonomic_rules = \
|
|
526
564
|
(classification_descriptions_clean is not None) and \
|
|
527
565
|
(len(classification_descriptions_clean) > 0) and \
|
|
528
566
|
(len(category_to_count) > 1)
|
|
529
|
-
|
|
567
|
+
|
|
530
568
|
if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
|
|
531
|
-
|
|
569
|
+
|
|
532
570
|
# det = detections[3]
|
|
533
571
|
for det in detections:
|
|
534
|
-
|
|
535
|
-
if
|
|
536
|
-
(det['conf'] < options.detection_confidence_threshold):
|
|
572
|
+
|
|
573
|
+
if not _detection_is_relevant_for_smoothing(det,options):
|
|
537
574
|
continue
|
|
538
|
-
|
|
575
|
+
|
|
539
576
|
assert len(det['classifications']) == 1
|
|
540
577
|
c = det['classifications'][0]
|
|
541
|
-
|
|
578
|
+
|
|
542
579
|
# Don't bother with any classifications below the confidence threshold
|
|
543
580
|
if c[1] < options.classification_confidence_threshold:
|
|
544
581
|
continue
|
|
545
582
|
|
|
546
583
|
category_id_this_classification = c[0]
|
|
547
584
|
assert category_id_this_classification in category_to_count
|
|
548
|
-
|
|
585
|
+
|
|
549
586
|
category_description_this_classification = \
|
|
550
587
|
classification_descriptions_clean[category_id_this_classification]
|
|
551
|
-
|
|
552
|
-
# An empty description corresponds to the "animal" category. We don't handle
|
|
553
|
-
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
588
|
+
|
|
589
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
590
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
554
591
|
# step above.
|
|
555
592
|
if len(category_description_this_classification) == 0:
|
|
556
593
|
continue
|
|
557
|
-
|
|
594
|
+
|
|
558
595
|
# We may have multiple child categories to choose from; this keeps track of
|
|
559
596
|
# the "best" we've seen so far. "Best" is based on the level (species is better
|
|
560
597
|
# than genus) and number.
|
|
561
598
|
child_category_to_score = defaultdict(float)
|
|
562
|
-
|
|
599
|
+
|
|
563
600
|
for category_id_of_candidate_child in category_to_count.keys():
|
|
564
|
-
|
|
601
|
+
|
|
565
602
|
# A category is never its own child
|
|
566
603
|
if category_id_of_candidate_child == category_id_this_classification:
|
|
567
604
|
continue
|
|
568
|
-
|
|
605
|
+
|
|
569
606
|
# Is this candidate a child of the current classification?
|
|
570
607
|
category_description_candidate_child = \
|
|
571
608
|
classification_descriptions_clean[category_id_of_candidate_child]
|
|
572
|
-
|
|
609
|
+
|
|
573
610
|
# An empty description corresponds to "animal", which can never
|
|
574
611
|
# be a child of another category.
|
|
575
612
|
if len(category_description_candidate_child) == 0:
|
|
576
613
|
continue
|
|
577
|
-
|
|
578
|
-
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
614
|
+
|
|
615
|
+
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
579
616
|
# relationships are defined by a substring relationship
|
|
580
617
|
is_child = category_description_this_classification in \
|
|
581
618
|
category_description_candidate_child
|
|
582
619
|
if not is_child:
|
|
583
620
|
continue
|
|
584
|
-
|
|
621
|
+
|
|
585
622
|
# How many instances of this child category are there?
|
|
586
623
|
child_category_count = category_to_count[category_id_of_candidate_child]
|
|
587
|
-
|
|
624
|
+
|
|
588
625
|
# What taxonomy level is this child category defined at?
|
|
589
626
|
child_category_level = taxonomy_level_index(
|
|
590
627
|
classification_descriptions[category_id_of_candidate_child])
|
|
591
|
-
|
|
628
|
+
|
|
592
629
|
child_category_to_score[category_id_of_candidate_child] = \
|
|
593
630
|
child_category_level * options.taxonomy_propagation_level_weight + \
|
|
594
631
|
child_category_count * options.taxonomy_propagation_count_weight
|
|
595
|
-
|
|
632
|
+
|
|
596
633
|
# ...for each category we are considering reducing this classification to
|
|
597
|
-
|
|
634
|
+
|
|
598
635
|
# Did we find a category we want to change this classification to?
|
|
599
636
|
if len(child_category_to_score) > 0:
|
|
600
|
-
|
|
637
|
+
|
|
601
638
|
# Find the child category with the highest score
|
|
602
639
|
child_category_to_score = sort_dictionary_by_value(
|
|
603
640
|
child_category_to_score,reverse=True)
|
|
604
641
|
best_child_category = next(iter(child_category_to_score.keys()))
|
|
605
|
-
|
|
642
|
+
|
|
606
643
|
if verbose_debug_enabled:
|
|
607
644
|
old_category_name = \
|
|
608
645
|
classification_descriptions_clean[c[0]]
|
|
609
646
|
new_category_name = \
|
|
610
647
|
classification_descriptions_clean[best_child_category]
|
|
611
648
|
print('Replacing {} with {}'.format(
|
|
612
|
-
old_category_name,new_category_name))
|
|
613
|
-
|
|
649
|
+
old_category_name,new_category_name))
|
|
650
|
+
|
|
614
651
|
c[0] = best_child_category
|
|
615
|
-
n_taxonomic_changes_this_image += 1
|
|
616
|
-
|
|
652
|
+
n_taxonomic_changes_this_image += 1
|
|
653
|
+
|
|
617
654
|
# ...for each detection
|
|
618
|
-
|
|
619
|
-
# ...if we have taxonomic information available
|
|
620
|
-
|
|
621
|
-
|
|
655
|
+
|
|
656
|
+
# ...if we have taxonomic information available
|
|
657
|
+
|
|
658
|
+
|
|
622
659
|
## Re-count
|
|
623
|
-
|
|
660
|
+
|
|
624
661
|
category_to_count = count_detections_by_classification_category(detections, options)
|
|
625
|
-
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
662
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
626
663
|
keys = list(category_to_count.keys())
|
|
627
|
-
max_count = category_to_count[keys[0]]
|
|
664
|
+
max_count = category_to_count[keys[0]]
|
|
628
665
|
most_common_category = keys[0]
|
|
629
666
|
del keys
|
|
630
|
-
|
|
631
|
-
|
|
667
|
+
|
|
668
|
+
|
|
632
669
|
## Possibly do within-family smoothing
|
|
633
|
-
|
|
670
|
+
|
|
634
671
|
n_within_family_smoothing_changes = 0
|
|
635
|
-
|
|
672
|
+
|
|
636
673
|
# min_detections_to_overwrite_secondary_same_family = -1
|
|
637
674
|
# max_detections_nondominant_class_same_family = 1
|
|
638
675
|
family_level = taxonomy_level_string_to_index('family')
|
|
639
|
-
|
|
676
|
+
|
|
640
677
|
if process_taxonomic_rules:
|
|
641
|
-
|
|
678
|
+
|
|
642
679
|
category_description_most_common_category = \
|
|
643
680
|
classification_descriptions[most_common_category]
|
|
644
681
|
most_common_category_taxonomic_level = \
|
|
645
|
-
taxonomy_level_index(category_description_most_common_category)
|
|
682
|
+
taxonomy_level_index(category_description_most_common_category)
|
|
646
683
|
n_most_common_category = category_to_count[most_common_category]
|
|
647
684
|
tokens = category_description_most_common_category.split(';')
|
|
648
685
|
assert len(tokens) == 7
|
|
649
686
|
most_common_category_family = tokens[3]
|
|
650
687
|
most_common_category_genus = tokens[4]
|
|
651
|
-
|
|
688
|
+
|
|
652
689
|
# Only consider remapping to genus or species level, and only when we have
|
|
653
690
|
# a high enough count in the most common category
|
|
654
691
|
if process_taxonomic_rules and \
|
|
@@ -656,36 +693,35 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
656
693
|
(most_common_category not in other_category_ids) and \
|
|
657
694
|
(most_common_category_taxonomic_level > family_level) and \
|
|
658
695
|
(n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
|
|
659
|
-
|
|
696
|
+
|
|
660
697
|
# det = detections[0]
|
|
661
698
|
for det in detections:
|
|
662
|
-
|
|
663
|
-
if
|
|
664
|
-
(det['conf'] < options.detection_confidence_threshold):
|
|
699
|
+
|
|
700
|
+
if not _detection_is_relevant_for_smoothing(det,options):
|
|
665
701
|
continue
|
|
666
|
-
|
|
702
|
+
|
|
667
703
|
assert len(det['classifications']) == 1
|
|
668
704
|
c = det['classifications'][0]
|
|
669
|
-
|
|
705
|
+
|
|
670
706
|
# Don't over-write the most common category with itself
|
|
671
707
|
if c[0] == most_common_category:
|
|
672
708
|
continue
|
|
673
|
-
|
|
709
|
+
|
|
674
710
|
# Don't bother with below-threshold classifications
|
|
675
711
|
if c[1] < options.classification_confidence_threshold:
|
|
676
|
-
continue
|
|
677
|
-
|
|
712
|
+
continue
|
|
713
|
+
|
|
678
714
|
n_candidate_flip_category = category_to_count[c[0]]
|
|
679
|
-
|
|
715
|
+
|
|
680
716
|
# Do we have too many of the non-dominant category to do this kind of swap?
|
|
681
717
|
if n_candidate_flip_category > \
|
|
682
718
|
options.max_detections_nondominant_class_same_family:
|
|
683
719
|
continue
|
|
684
720
|
|
|
685
|
-
# Don't flip classes when it's a tie
|
|
721
|
+
# Don't flip classes when it's a tie
|
|
686
722
|
if n_candidate_flip_category == n_most_common_category:
|
|
687
723
|
continue
|
|
688
|
-
|
|
724
|
+
|
|
689
725
|
category_description_candidate_flip = \
|
|
690
726
|
classification_descriptions[c[0]]
|
|
691
727
|
tokens = category_description_candidate_flip.split(';')
|
|
@@ -693,34 +729,33 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
693
729
|
candidate_flip_category_family = tokens[3]
|
|
694
730
|
candidate_flip_category_genus = tokens[4]
|
|
695
731
|
candidate_flip_category_taxonomic_level = \
|
|
696
|
-
taxonomy_level_index(category_description_candidate_flip)
|
|
697
|
-
|
|
732
|
+
taxonomy_level_index(category_description_candidate_flip)
|
|
733
|
+
|
|
698
734
|
# Only proceed if we have valid family strings
|
|
699
735
|
if (len(candidate_flip_category_family) == 0) or \
|
|
700
736
|
(len(most_common_category_family) == 0):
|
|
701
737
|
continue
|
|
702
|
-
|
|
703
|
-
# Only proceed if the candidate and the most common category are in the same family
|
|
738
|
+
|
|
739
|
+
# Only proceed if the candidate and the most common category are in the same family
|
|
704
740
|
if candidate_flip_category_family != most_common_category_family:
|
|
705
741
|
continue
|
|
706
|
-
|
|
742
|
+
|
|
707
743
|
# Don't flip from a species to the genus level in the same genus
|
|
708
744
|
if (candidate_flip_category_genus == most_common_category_genus) and \
|
|
709
745
|
(candidate_flip_category_taxonomic_level > \
|
|
710
746
|
most_common_category_taxonomic_level):
|
|
711
747
|
continue
|
|
712
|
-
|
|
748
|
+
|
|
713
749
|
old_category_name = classification_descriptions_clean[c[0]]
|
|
714
750
|
new_category_name = classification_descriptions_clean[most_common_category]
|
|
715
|
-
|
|
751
|
+
|
|
716
752
|
c[0] = most_common_category
|
|
717
|
-
n_within_family_smoothing_changes += 1
|
|
718
|
-
|
|
753
|
+
n_within_family_smoothing_changes += 1
|
|
754
|
+
|
|
719
755
|
# ...for each detection
|
|
720
|
-
|
|
756
|
+
|
|
721
757
|
# ...if the dominant category is legit and we have taxonomic information available
|
|
722
|
-
|
|
723
|
-
|
|
758
|
+
|
|
724
759
|
return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
|
|
725
760
|
'n_detections_flipped_this_image':n_detections_flipped_this_image,
|
|
726
761
|
'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
|
|
@@ -737,33 +772,33 @@ def _smooth_single_image(im,
|
|
|
737
772
|
"""
|
|
738
773
|
Smooth classifications for a single image. Returns None if no changes are made,
|
|
739
774
|
else a dict.
|
|
740
|
-
|
|
741
|
-
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
775
|
+
|
|
776
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
742
777
|
from which common names and GUIDs have already been removed.
|
|
743
|
-
|
|
778
|
+
|
|
744
779
|
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
745
780
|
have already been remoevd.
|
|
746
781
|
"""
|
|
747
|
-
|
|
782
|
+
|
|
748
783
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
749
784
|
return
|
|
750
|
-
|
|
785
|
+
|
|
751
786
|
detections = im['detections']
|
|
752
|
-
|
|
787
|
+
|
|
753
788
|
# Simplify debugging
|
|
754
789
|
for det in detections:
|
|
755
790
|
det['image_filename'] = im['file']
|
|
756
|
-
|
|
757
|
-
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
758
|
-
options=options,
|
|
791
|
+
|
|
792
|
+
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
793
|
+
options=options,
|
|
759
794
|
other_category_ids=other_category_ids,
|
|
760
|
-
classification_descriptions=classification_descriptions,
|
|
795
|
+
classification_descriptions=classification_descriptions,
|
|
761
796
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
762
797
|
|
|
763
798
|
# Clean out debug information
|
|
764
799
|
for det in detections:
|
|
765
800
|
del det['image_filename']
|
|
766
|
-
|
|
801
|
+
|
|
767
802
|
return to_return
|
|
768
803
|
|
|
769
804
|
# ...def smooth_single_image
|
|
@@ -775,104 +810,104 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
775
810
|
"""
|
|
776
811
|
Smooth classifications at the image level for all results in the MD-formatted results
|
|
777
812
|
file [input_file], optionally writing a new set of results to [output_file].
|
|
778
|
-
|
|
779
|
-
This function generally expresses the notion that an image with 700 cows and one deer
|
|
813
|
+
|
|
814
|
+
This function generally expresses the notion that an image with 700 cows and one deer
|
|
780
815
|
is really just 701 cows.
|
|
781
|
-
|
|
816
|
+
|
|
782
817
|
Only count detections with a classification confidence threshold above
|
|
783
818
|
[options.classification_confidence_threshold], which in practice means we're only
|
|
784
819
|
looking at one category per detection.
|
|
785
|
-
|
|
820
|
+
|
|
786
821
|
If an image has at least [options.min_detections_to_overwrite_secondary] such detections
|
|
787
822
|
in the most common category, and no more than [options.max_detections_nondominant_class]
|
|
788
823
|
in the second-most-common category, flip all detections to the most common
|
|
789
824
|
category.
|
|
790
|
-
|
|
791
|
-
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
825
|
+
|
|
826
|
+
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
792
827
|
"other" class.
|
|
793
|
-
|
|
828
|
+
|
|
794
829
|
This function also removes everything but the non-dominant classification for each detection.
|
|
795
|
-
|
|
830
|
+
|
|
796
831
|
Args:
|
|
797
832
|
input_file (str): MegaDetector-formatted classification results file to smooth. Can
|
|
798
833
|
also be an already-loaded results dict.
|
|
799
834
|
output_file (str, optional): .json file to write smoothed results
|
|
800
|
-
options (ClassificationSmoothingOptions, optional): see
|
|
835
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
801
836
|
ClassificationSmoothingOptions for details.
|
|
802
|
-
|
|
837
|
+
|
|
803
838
|
Returns:
|
|
804
839
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
805
840
|
[output_file] if [output_file] is not None.
|
|
806
841
|
"""
|
|
807
|
-
|
|
842
|
+
|
|
808
843
|
## Input validation
|
|
809
|
-
|
|
844
|
+
|
|
810
845
|
if options is None:
|
|
811
846
|
options = ClassificationSmoothingOptions()
|
|
812
|
-
|
|
847
|
+
|
|
813
848
|
r = _prepare_results_for_smoothing(input_file, options)
|
|
814
849
|
d = r['d']
|
|
815
850
|
other_category_ids = r['other_category_ids']
|
|
816
851
|
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
817
852
|
classification_descriptions = r['classification_descriptions']
|
|
818
|
-
|
|
819
|
-
|
|
853
|
+
|
|
854
|
+
|
|
820
855
|
## Smoothing
|
|
821
|
-
|
|
856
|
+
|
|
822
857
|
n_other_classifications_changed = 0
|
|
823
858
|
n_other_images_changed = 0
|
|
824
859
|
n_taxonomic_images_changed = 0
|
|
825
|
-
|
|
860
|
+
|
|
826
861
|
n_detections_flipped = 0
|
|
827
862
|
n_images_changed = 0
|
|
828
|
-
n_taxonomic_classification_changes = 0
|
|
829
|
-
|
|
830
|
-
# im = d['images'][0]
|
|
863
|
+
n_taxonomic_classification_changes = 0
|
|
864
|
+
|
|
865
|
+
# im = d['images'][0]
|
|
831
866
|
for im in tqdm(d['images']):
|
|
832
|
-
|
|
867
|
+
|
|
833
868
|
r = _smooth_single_image(im,
|
|
834
869
|
options,
|
|
835
870
|
other_category_ids,
|
|
836
871
|
classification_descriptions=classification_descriptions,
|
|
837
872
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
838
|
-
|
|
873
|
+
|
|
839
874
|
if r is None:
|
|
840
875
|
continue
|
|
841
|
-
|
|
876
|
+
|
|
842
877
|
n_detections_flipped_this_image = r['n_detections_flipped_this_image']
|
|
843
878
|
n_other_classifications_changed_this_image = \
|
|
844
879
|
r['n_other_classifications_changed_this_image']
|
|
845
880
|
n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
|
|
846
|
-
|
|
881
|
+
|
|
847
882
|
n_detections_flipped += n_detections_flipped_this_image
|
|
848
883
|
n_other_classifications_changed += n_other_classifications_changed_this_image
|
|
849
884
|
n_taxonomic_classification_changes += n_taxonomic_changes_this_image
|
|
850
|
-
|
|
885
|
+
|
|
851
886
|
if n_detections_flipped_this_image > 0:
|
|
852
887
|
n_images_changed += 1
|
|
853
888
|
if n_other_classifications_changed_this_image > 0:
|
|
854
889
|
n_other_images_changed += 1
|
|
855
890
|
if n_taxonomic_changes_this_image > 0:
|
|
856
891
|
n_taxonomic_images_changed += 1
|
|
857
|
-
|
|
858
|
-
# ...for each image
|
|
859
|
-
|
|
892
|
+
|
|
893
|
+
# ...for each image
|
|
894
|
+
|
|
860
895
|
print('Classification smoothing: changed {} detections on {} images'.format(
|
|
861
896
|
n_detections_flipped,n_images_changed))
|
|
862
|
-
|
|
897
|
+
|
|
863
898
|
print('"Other" smoothing: changed {} detections on {} images'.format(
|
|
864
899
|
n_other_classifications_changed,n_other_images_changed))
|
|
865
|
-
|
|
900
|
+
|
|
866
901
|
print('Taxonomic smoothing: changed {} detections on {} images'.format(
|
|
867
902
|
n_taxonomic_classification_changes,n_taxonomic_images_changed))
|
|
868
|
-
|
|
869
|
-
|
|
903
|
+
|
|
904
|
+
|
|
870
905
|
## Write output
|
|
871
|
-
|
|
872
|
-
if output_file is not None:
|
|
906
|
+
|
|
907
|
+
if output_file is not None:
|
|
873
908
|
print('Writing results after image-level smoothing to:\n{}'.format(output_file))
|
|
874
909
|
with open(output_file,'w') as f:
|
|
875
|
-
json.dump(d,f,indent=1)
|
|
910
|
+
json.dump(d,f,indent=1)
|
|
876
911
|
|
|
877
912
|
return d
|
|
878
913
|
|
|
@@ -880,7 +915,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
880
915
|
|
|
881
916
|
|
|
882
917
|
#%% Sequence-level smoothing
|
|
883
|
-
|
|
918
|
+
|
|
884
919
|
def smooth_classification_results_sequence_level(input_file,
|
|
885
920
|
cct_sequence_information,
|
|
886
921
|
output_file=None,
|
|
@@ -888,39 +923,39 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
888
923
|
"""
|
|
889
924
|
Smooth classifications at the sequence level for all results in the MD-formatted results
|
|
890
925
|
file [md_results_file], optionally writing a new set of results to [output_file].
|
|
891
|
-
|
|
926
|
+
|
|
892
927
|
This function generally expresses the notion that a sequence that looks like
|
|
893
928
|
deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
|
|
894
|
-
|
|
929
|
+
|
|
895
930
|
Args:
|
|
896
931
|
input_file (str or dict): MegaDetector-formatted classification results file to smooth
|
|
897
|
-
(or already-loaded results). If you supply a dict, it's
|
|
898
|
-
|
|
932
|
+
(or already-loaded results). If you supply a dict, it's copied by default, but
|
|
933
|
+
in-place modification is supported via options.modify_in_place.
|
|
899
934
|
cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
|
|
900
935
|
each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
|
|
901
936
|
output_file (str, optional): .json file to write smoothed results
|
|
902
|
-
options (ClassificationSmoothingOptions, optional): see
|
|
937
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
903
938
|
ClassificationSmoothingOptions for details.
|
|
904
|
-
|
|
939
|
+
|
|
905
940
|
Returns:
|
|
906
941
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
907
942
|
[output_file] if [output_file] is not None.
|
|
908
943
|
"""
|
|
909
|
-
|
|
944
|
+
|
|
910
945
|
## Input validation
|
|
911
|
-
|
|
946
|
+
|
|
912
947
|
if options is None:
|
|
913
948
|
options = ClassificationSmoothingOptions()
|
|
914
|
-
|
|
949
|
+
|
|
915
950
|
r = _prepare_results_for_smoothing(input_file, options)
|
|
916
951
|
d = r['d']
|
|
917
952
|
other_category_ids = r['other_category_ids']
|
|
918
953
|
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
919
954
|
classification_descriptions = r['classification_descriptions']
|
|
920
|
-
|
|
921
|
-
|
|
955
|
+
|
|
956
|
+
|
|
922
957
|
## Make a list of images appearing in each sequence
|
|
923
|
-
|
|
958
|
+
|
|
924
959
|
if isinstance(cct_sequence_information,list):
|
|
925
960
|
image_info = cct_sequence_information
|
|
926
961
|
elif isinstance(cct_sequence_information,str):
|
|
@@ -931,77 +966,77 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
931
966
|
else:
|
|
932
967
|
assert isinstance(cct_sequence_information,dict)
|
|
933
968
|
image_info = cct_sequence_information['images']
|
|
934
|
-
|
|
969
|
+
|
|
935
970
|
sequence_to_image_filenames = defaultdict(list)
|
|
936
|
-
|
|
971
|
+
|
|
937
972
|
# im = image_info[0]
|
|
938
973
|
for im in tqdm(image_info):
|
|
939
|
-
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
974
|
+
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
940
975
|
del image_info
|
|
941
|
-
|
|
976
|
+
|
|
942
977
|
image_fn_to_classification_results = {}
|
|
943
978
|
for im in d['images']:
|
|
944
979
|
fn = im['file']
|
|
945
980
|
assert fn not in image_fn_to_classification_results
|
|
946
981
|
image_fn_to_classification_results[fn] = im
|
|
947
|
-
|
|
948
|
-
|
|
982
|
+
|
|
983
|
+
|
|
949
984
|
## Smoothing
|
|
950
|
-
|
|
985
|
+
|
|
951
986
|
n_other_classifications_changed = 0
|
|
952
987
|
n_other_sequences_changed = 0
|
|
953
988
|
n_taxonomic_sequences_changed = 0
|
|
954
989
|
n_within_family_sequences_changed = 0
|
|
955
|
-
|
|
990
|
+
|
|
956
991
|
n_detections_flipped = 0
|
|
957
992
|
n_sequences_changed = 0
|
|
958
|
-
n_taxonomic_classification_changes = 0
|
|
959
|
-
n_within_family_changes = 0
|
|
960
|
-
|
|
993
|
+
n_taxonomic_classification_changes = 0
|
|
994
|
+
n_within_family_changes = 0
|
|
995
|
+
|
|
961
996
|
# sequence_id = list(sequence_to_image_filenames.keys())[0]
|
|
962
997
|
for sequence_id in sequence_to_image_filenames.keys():
|
|
963
998
|
|
|
964
999
|
image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
|
|
965
|
-
|
|
1000
|
+
|
|
966
1001
|
# if 'file' in image_filenames_this_sequence:
|
|
967
1002
|
# from IPython import embed; embed()
|
|
968
|
-
|
|
1003
|
+
|
|
969
1004
|
detections_this_sequence = []
|
|
970
1005
|
for image_filename in image_filenames_this_sequence:
|
|
971
1006
|
im = image_fn_to_classification_results[image_filename]
|
|
972
1007
|
if 'detections' not in im or im['detections'] is None:
|
|
973
1008
|
continue
|
|
974
1009
|
detections_this_sequence.extend(im['detections'])
|
|
975
|
-
|
|
1010
|
+
|
|
976
1011
|
# Temporarily add image filenames to every detection,
|
|
977
1012
|
# for debugging
|
|
978
1013
|
for det in im['detections']:
|
|
979
1014
|
det['image_filename'] = im['file']
|
|
980
|
-
|
|
1015
|
+
|
|
981
1016
|
if len(detections_this_sequence) == 0:
|
|
982
1017
|
continue
|
|
983
|
-
|
|
1018
|
+
|
|
984
1019
|
r = _smooth_classifications_for_list_of_detections(
|
|
985
|
-
detections=detections_this_sequence,
|
|
986
|
-
options=options,
|
|
1020
|
+
detections=detections_this_sequence,
|
|
1021
|
+
options=options,
|
|
987
1022
|
other_category_ids=other_category_ids,
|
|
988
|
-
classification_descriptions=classification_descriptions,
|
|
1023
|
+
classification_descriptions=classification_descriptions,
|
|
989
1024
|
classification_descriptions_clean=classification_descriptions_clean)
|
|
990
|
-
|
|
1025
|
+
|
|
991
1026
|
if r is None:
|
|
992
1027
|
continue
|
|
993
|
-
|
|
1028
|
+
|
|
994
1029
|
n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
|
|
995
1030
|
n_other_classifications_changed_this_sequence = \
|
|
996
1031
|
r['n_other_classifications_changed_this_image']
|
|
997
1032
|
n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
|
|
998
1033
|
n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
|
|
999
|
-
|
|
1034
|
+
|
|
1000
1035
|
n_detections_flipped += n_detections_flipped_this_sequence
|
|
1001
1036
|
n_other_classifications_changed += n_other_classifications_changed_this_sequence
|
|
1002
1037
|
n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
|
|
1003
1038
|
n_within_family_changes += n_within_family_changes_this_sequence
|
|
1004
|
-
|
|
1039
|
+
|
|
1005
1040
|
if n_detections_flipped_this_sequence > 0:
|
|
1006
1041
|
n_sequences_changed += 1
|
|
1007
1042
|
if n_other_classifications_changed_this_sequence > 0:
|
|
@@ -1010,40 +1045,40 @@ def smooth_classification_results_sequence_level(input_file,
|
|
|
1010
1045
|
n_taxonomic_sequences_changed += 1
|
|
1011
1046
|
if n_within_family_changes_this_sequence > 0:
|
|
1012
1047
|
n_within_family_sequences_changed += 1
|
|
1013
|
-
|
|
1048
|
+
|
|
1014
1049
|
# ...for each sequence
|
|
1015
|
-
|
|
1050
|
+
|
|
1016
1051
|
print('Classification smoothing: changed {} detections in {} sequences'.format(
|
|
1017
1052
|
n_detections_flipped,n_sequences_changed))
|
|
1018
|
-
|
|
1053
|
+
|
|
1019
1054
|
print('"Other" smoothing: changed {} detections in {} sequences'.format(
|
|
1020
1055
|
n_other_classifications_changed,n_other_sequences_changed))
|
|
1021
|
-
|
|
1056
|
+
|
|
1022
1057
|
print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
|
|
1023
1058
|
n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
|
|
1024
1059
|
|
|
1025
1060
|
print('Within-family smoothing: changed {} detections in {} sequences'.format(
|
|
1026
1061
|
n_within_family_changes,n_within_family_sequences_changed))
|
|
1027
|
-
|
|
1028
|
-
|
|
1062
|
+
|
|
1063
|
+
|
|
1029
1064
|
## Clean up debug information
|
|
1030
|
-
|
|
1065
|
+
|
|
1031
1066
|
for im in d['images']:
|
|
1032
1067
|
if 'detections' not in im or im['detections'] is None:
|
|
1033
1068
|
continue
|
|
1034
1069
|
for det in im['detections']:
|
|
1035
1070
|
if 'image_filename' in det:
|
|
1036
1071
|
del det['image_filename']
|
|
1037
|
-
|
|
1072
|
+
|
|
1038
1073
|
|
|
1039
1074
|
## Write output
|
|
1040
|
-
|
|
1041
|
-
if output_file is not None:
|
|
1075
|
+
|
|
1076
|
+
if output_file is not None:
|
|
1042
1077
|
print('Writing sequence-smoothed classification results to {}'.format(
|
|
1043
|
-
output_file))
|
|
1078
|
+
output_file))
|
|
1044
1079
|
with open(output_file,'w') as f:
|
|
1045
1080
|
json.dump(d,f,indent=1)
|
|
1046
|
-
|
|
1081
|
+
|
|
1047
1082
|
return d
|
|
1048
1083
|
|
|
1049
1084
|
# ...smooth_classification_results_sequence_level(...)
|
|
@@ -1058,14 +1093,14 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1058
1093
|
"""
|
|
1059
1094
|
Given a prediction file in MD .json format, likely without having had
|
|
1060
1095
|
a geofence applied, apply a custom taxa list.
|
|
1061
|
-
|
|
1096
|
+
|
|
1062
1097
|
Args:
|
|
1063
1098
|
taxa_list (str or list): list of latin names, or a text file containing
|
|
1064
1099
|
a list of latin names. Optionally may contain a second (comma-delimited)
|
|
1065
1100
|
column containing common names, used only for debugging. Latin names
|
|
1066
1101
|
must exist in the SpeciesNet taxonomy.
|
|
1067
|
-
speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
|
|
1068
|
-
model release (with 7-token taxonomy entries)
|
|
1102
|
+
speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
|
|
1103
|
+
model release (with 7-token taxonomy entries)
|
|
1069
1104
|
input_file (str): .json file to read, in MD format. This can be None, in which
|
|
1070
1105
|
case this function just validates [taxa_list].
|
|
1071
1106
|
output_file (str): .json file to write, in MD format
|
|
@@ -1074,22 +1109,22 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1074
1109
|
For example, if only a single felid species is allowed, should other
|
|
1075
1110
|
felid predictions be mapped to that species, as opposed to being mapped
|
|
1076
1111
|
to the family?
|
|
1077
|
-
|
|
1078
|
-
field that summarizes each image's classifications prior to taxonomic
|
|
1112
|
+
add_pre_filtering_description (bool, optional): should we add a new metadata
|
|
1113
|
+
field that summarizes each image's classifications prior to taxonomic
|
|
1079
1114
|
restriction?
|
|
1080
1115
|
"""
|
|
1081
1116
|
|
|
1082
1117
|
##%% Read target taxa list
|
|
1083
|
-
|
|
1118
|
+
|
|
1084
1119
|
if isinstance(taxa_list,str):
|
|
1085
1120
|
assert os.path.isfile(taxa_list), \
|
|
1086
1121
|
'Could not find taxa list file {}'.format(taxa_list)
|
|
1087
1122
|
with open(taxa_list,'r') as f:
|
|
1088
1123
|
taxa_list = f.readlines()
|
|
1089
|
-
|
|
1124
|
+
|
|
1090
1125
|
taxa_list = [s.strip().lower() for s in taxa_list]
|
|
1091
1126
|
taxa_list = [s for s in taxa_list if len(s) > 0]
|
|
1092
|
-
|
|
1127
|
+
|
|
1093
1128
|
target_latin_to_common = {}
|
|
1094
1129
|
for s in taxa_list:
|
|
1095
1130
|
if s.strip().startswith('#'):
|
|
@@ -1105,38 +1140,38 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1105
1140
|
common_name = None
|
|
1106
1141
|
assert binomial_name not in target_latin_to_common
|
|
1107
1142
|
target_latin_to_common[binomial_name] = common_name
|
|
1108
|
-
|
|
1143
|
+
|
|
1109
1144
|
|
|
1110
1145
|
##%% Read taxonomy file
|
|
1111
|
-
|
|
1146
|
+
|
|
1112
1147
|
with open(speciesnet_taxonomy_file,'r') as f:
|
|
1113
1148
|
speciesnet_taxonomy_list = f.readlines()
|
|
1114
1149
|
speciesnet_taxonomy_list = [s.strip() for s in \
|
|
1115
1150
|
speciesnet_taxonomy_list if len(s.strip()) > 0]
|
|
1116
|
-
|
|
1151
|
+
|
|
1117
1152
|
# Maps the latin name of every taxon to the corresponding full taxon string
|
|
1118
1153
|
#
|
|
1119
1154
|
# For species, the key is a binomial name
|
|
1120
1155
|
speciesnet_latin_name_to_taxon_string = {}
|
|
1121
1156
|
speciesnet_common_name_to_taxon_string = {}
|
|
1122
|
-
|
|
1157
|
+
|
|
1123
1158
|
def _insert_taxonomy_string(s):
|
|
1124
|
-
|
|
1159
|
+
|
|
1125
1160
|
tokens = s.split(';')
|
|
1126
1161
|
assert len(tokens) == 7
|
|
1127
|
-
|
|
1162
|
+
|
|
1128
1163
|
guid = tokens[0] # noqa
|
|
1129
1164
|
class_name = tokens[1]
|
|
1130
1165
|
order = tokens[2]
|
|
1131
1166
|
family = tokens[3]
|
|
1132
1167
|
genus = tokens[4]
|
|
1133
|
-
species = tokens[5]
|
|
1168
|
+
species = tokens[5]
|
|
1134
1169
|
common_name = tokens[6]
|
|
1135
|
-
|
|
1170
|
+
|
|
1136
1171
|
if len(class_name) == 0:
|
|
1137
1172
|
assert common_name in ('animal','vehicle','blank')
|
|
1138
1173
|
return
|
|
1139
|
-
|
|
1174
|
+
|
|
1140
1175
|
if len(species) > 0:
|
|
1141
1176
|
assert all([len(s) > 0 for s in [genus,family,order]])
|
|
1142
1177
|
binomial_name = genus + ' ' + species
|
|
@@ -1156,43 +1191,43 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1156
1191
|
else:
|
|
1157
1192
|
if class_name not in speciesnet_latin_name_to_taxon_string:
|
|
1158
1193
|
speciesnet_latin_name_to_taxon_string[class_name] = s
|
|
1159
|
-
|
|
1194
|
+
|
|
1160
1195
|
if len(common_name) > 0:
|
|
1161
1196
|
if common_name not in speciesnet_common_name_to_taxon_string:
|
|
1162
1197
|
speciesnet_common_name_to_taxon_string[common_name] = s
|
|
1163
|
-
|
|
1198
|
+
|
|
1164
1199
|
for s in speciesnet_taxonomy_list:
|
|
1165
|
-
|
|
1200
|
+
|
|
1166
1201
|
_insert_taxonomy_string(s)
|
|
1167
|
-
|
|
1168
|
-
|
|
1202
|
+
|
|
1203
|
+
|
|
1169
1204
|
##%% Make sure all parent taxa are represented in the taxonomy
|
|
1170
|
-
|
|
1205
|
+
|
|
1171
1206
|
# In theory any taxon that appears as the parent of another taxon should
|
|
1172
1207
|
# also be in the taxonomy, but this isn't always true, so we fix it here.
|
|
1173
|
-
|
|
1208
|
+
|
|
1174
1209
|
new_taxon_string_to_missing_tokens = defaultdict(list)
|
|
1175
|
-
|
|
1210
|
+
|
|
1176
1211
|
# latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
|
|
1177
1212
|
for latin_name in speciesnet_latin_name_to_taxon_string.keys():
|
|
1178
|
-
|
|
1213
|
+
|
|
1179
1214
|
if 'no cv result' in latin_name:
|
|
1180
1215
|
continue
|
|
1181
|
-
|
|
1216
|
+
|
|
1182
1217
|
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1183
1218
|
tokens = taxon_string.split(';')
|
|
1184
|
-
|
|
1219
|
+
|
|
1185
1220
|
# Don't process GUID, species, or common name
|
|
1186
1221
|
# i_token = 6
|
|
1187
1222
|
for i_token in range(1,len(tokens)-2):
|
|
1188
|
-
|
|
1189
|
-
test_token = tokens[i_token]
|
|
1223
|
+
|
|
1224
|
+
test_token = tokens[i_token]
|
|
1190
1225
|
if len(test_token) == 0:
|
|
1191
1226
|
continue
|
|
1192
|
-
|
|
1227
|
+
|
|
1193
1228
|
# Do we need to make up a taxon for this token?
|
|
1194
1229
|
if test_token not in speciesnet_latin_name_to_taxon_string:
|
|
1195
|
-
|
|
1230
|
+
|
|
1196
1231
|
new_tokens = [''] * 7
|
|
1197
1232
|
new_tokens[0] = 'fake_guid'
|
|
1198
1233
|
for i_copy_token in range(1,i_token+1):
|
|
@@ -1202,28 +1237,28 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1202
1237
|
new_taxon_string = ';'.join(new_tokens)
|
|
1203
1238
|
# assert new_taxon_string not in new_taxon_strings
|
|
1204
1239
|
new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
|
|
1205
|
-
|
|
1240
|
+
|
|
1206
1241
|
# ...for each token
|
|
1207
|
-
|
|
1242
|
+
|
|
1208
1243
|
# ...for each taxon
|
|
1209
|
-
|
|
1244
|
+
|
|
1210
1245
|
print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
|
|
1211
1246
|
len(new_taxon_string_to_missing_tokens)))
|
|
1212
|
-
|
|
1247
|
+
|
|
1213
1248
|
new_taxon_string_to_missing_tokens = \
|
|
1214
1249
|
sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
|
|
1215
1250
|
for taxon_string in new_taxon_string_to_missing_tokens:
|
|
1216
1251
|
missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
|
|
1217
1252
|
print('{} ({})'.format(taxon_string,missing_taxa))
|
|
1218
|
-
|
|
1253
|
+
|
|
1219
1254
|
for new_taxon_string in new_taxon_string_to_missing_tokens:
|
|
1220
1255
|
_insert_taxonomy_string(new_taxon_string)
|
|
1221
|
-
|
|
1222
|
-
|
|
1256
|
+
|
|
1257
|
+
|
|
1223
1258
|
##%% Make sure all species on the allow-list are in the taxonomy
|
|
1224
|
-
|
|
1259
|
+
|
|
1225
1260
|
n_failed_mappings = 0
|
|
1226
|
-
|
|
1261
|
+
|
|
1227
1262
|
for target_taxon_latin_name in target_latin_to_common.keys():
|
|
1228
1263
|
if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
|
|
1229
1264
|
common_name = target_latin_to_common[target_taxon_latin_name]
|
|
@@ -1234,99 +1269,99 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1234
1269
|
speciesnet_common_name_to_taxon_string[common_name])
|
|
1235
1270
|
print(s)
|
|
1236
1271
|
n_failed_mappings += 1
|
|
1237
|
-
|
|
1272
|
+
|
|
1238
1273
|
if n_failed_mappings > 0:
|
|
1239
1274
|
raise ValueError('Cannot continue with geofence generation')
|
|
1240
|
-
|
|
1241
|
-
|
|
1275
|
+
|
|
1276
|
+
|
|
1242
1277
|
##%% For the allow-list, map each parent taxon to a set of allowable child taxa
|
|
1243
|
-
|
|
1244
|
-
# Maps parent names to all allowed child names, or None if this is the
|
|
1278
|
+
|
|
1279
|
+
# Maps parent names to all allowed child names, or None if this is the
|
|
1245
1280
|
# lowest-level allowable taxon on this path
|
|
1246
1281
|
allowed_parent_taxon_to_child_taxa = defaultdict(set)
|
|
1247
|
-
|
|
1282
|
+
|
|
1248
1283
|
# latin_name = next(iter(target_latin_to_common.keys()))
|
|
1249
1284
|
for latin_name in target_latin_to_common:
|
|
1250
|
-
|
|
1285
|
+
|
|
1251
1286
|
taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
|
|
1252
1287
|
tokens = taxon_string.split(';')
|
|
1253
1288
|
assert len(tokens) == 7
|
|
1254
|
-
|
|
1289
|
+
|
|
1255
1290
|
# Remove GUID and common mame
|
|
1256
1291
|
#
|
|
1257
1292
|
# This is now always class/order/family/genus/species
|
|
1258
1293
|
tokens = tokens[1:-1]
|
|
1259
|
-
|
|
1294
|
+
|
|
1260
1295
|
child_taxon = None
|
|
1261
|
-
|
|
1296
|
+
|
|
1262
1297
|
# If this is a species
|
|
1263
1298
|
if len(tokens[-1]) > 0:
|
|
1264
1299
|
binomial_name = tokens[-2] + ' ' + tokens[-1]
|
|
1265
1300
|
assert binomial_name == latin_name
|
|
1266
1301
|
allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
|
|
1267
1302
|
child_taxon = binomial_name
|
|
1268
|
-
|
|
1269
|
-
# The first candidate parent is the genus
|
|
1303
|
+
|
|
1304
|
+
# The first candidate parent is the genus
|
|
1270
1305
|
parent_token_index = len(tokens) - 2
|
|
1271
1306
|
|
|
1272
1307
|
while(parent_token_index >= 0):
|
|
1273
|
-
|
|
1308
|
+
|
|
1274
1309
|
parent_taxon = tokens[parent_token_index]
|
|
1275
1310
|
allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
|
|
1276
1311
|
child_taxon = parent_taxon
|
|
1277
|
-
parent_token_index -= 1
|
|
1278
|
-
|
|
1312
|
+
parent_token_index -= 1
|
|
1313
|
+
|
|
1279
1314
|
# ...for each allowed latin name
|
|
1280
|
-
|
|
1315
|
+
|
|
1281
1316
|
allowed_parent_taxon_to_child_taxa = \
|
|
1282
1317
|
sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
|
|
1283
|
-
|
|
1284
|
-
|
|
1318
|
+
|
|
1319
|
+
|
|
1285
1320
|
##%% If we were just validating the custom taxa file, we're done
|
|
1286
1321
|
|
|
1287
1322
|
if input_file is None:
|
|
1288
1323
|
print('Finished validating custom taxonomy list')
|
|
1289
1324
|
return
|
|
1290
|
-
|
|
1325
|
+
|
|
1291
1326
|
|
|
1292
1327
|
##%% Map all predictions that exist in this dataset...
|
|
1293
|
-
|
|
1328
|
+
|
|
1294
1329
|
# ...to the prediction we should generate.
|
|
1295
|
-
|
|
1330
|
+
|
|
1296
1331
|
with open(input_file,'r') as f:
|
|
1297
1332
|
input_data = json.load(f)
|
|
1298
|
-
|
|
1333
|
+
|
|
1299
1334
|
input_category_id_to_common_name = input_data['classification_categories'] #noqa
|
|
1300
1335
|
input_category_id_to_taxonomy_string = \
|
|
1301
1336
|
input_data['classification_category_descriptions']
|
|
1302
|
-
|
|
1337
|
+
|
|
1303
1338
|
input_category_id_to_output_taxon_string = {}
|
|
1304
|
-
|
|
1339
|
+
|
|
1305
1340
|
# input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
|
|
1306
1341
|
for input_category_id in input_category_id_to_taxonomy_string.keys():
|
|
1307
|
-
|
|
1342
|
+
|
|
1308
1343
|
input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
|
|
1309
1344
|
input_taxon_tokens = input_taxon_string.split(';')
|
|
1310
1345
|
assert len(input_taxon_tokens) == 7
|
|
1311
|
-
|
|
1346
|
+
|
|
1312
1347
|
# Don't mess with blank/no-cv-result/animal/human
|
|
1313
1348
|
if (input_taxon_string in non_taxonomic_prediction_strings) or \
|
|
1314
1349
|
(input_taxon_string == human_prediction_string):
|
|
1315
1350
|
input_category_id_to_output_taxon_string[input_category_id] = \
|
|
1316
1351
|
input_taxon_string
|
|
1317
1352
|
continue
|
|
1318
|
-
|
|
1353
|
+
|
|
1319
1354
|
# Remove GUID and common mame
|
|
1320
|
-
|
|
1355
|
+
|
|
1321
1356
|
# This is now always class/order/family/genus/species
|
|
1322
1357
|
input_taxon_tokens = input_taxon_tokens[1:-1]
|
|
1323
|
-
|
|
1358
|
+
|
|
1324
1359
|
test_index = len(input_taxon_tokens) - 1
|
|
1325
1360
|
target_taxon = None
|
|
1326
|
-
|
|
1361
|
+
|
|
1327
1362
|
# Start at the species level, and see whether each taxon is allowed
|
|
1328
1363
|
while((test_index >= 0) and (target_taxon is None)):
|
|
1329
|
-
|
|
1364
|
+
|
|
1330
1365
|
# Species are represented as binomial names
|
|
1331
1366
|
if (test_index == (len(input_taxon_tokens) - 1)) and \
|
|
1332
1367
|
(len(input_taxon_tokens[-1]) > 0):
|
|
@@ -1334,27 +1369,27 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1334
1369
|
input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
|
|
1335
1370
|
else:
|
|
1336
1371
|
test_taxon_name = input_taxon_tokens[test_index]
|
|
1337
|
-
|
|
1372
|
+
|
|
1338
1373
|
# If we haven't yet found the level at which this taxon is non-empty,
|
|
1339
1374
|
# keep going up
|
|
1340
|
-
if len(test_taxon_name) == 0:
|
|
1375
|
+
if len(test_taxon_name) == 0:
|
|
1341
1376
|
test_index -= 1
|
|
1342
1377
|
continue
|
|
1343
|
-
|
|
1378
|
+
|
|
1344
1379
|
assert test_taxon_name in speciesnet_latin_name_to_taxon_string
|
|
1345
|
-
|
|
1380
|
+
|
|
1346
1381
|
# Is this taxon allowed according to the custom species list?
|
|
1347
1382
|
if test_taxon_name in allowed_parent_taxon_to_child_taxa:
|
|
1348
|
-
|
|
1383
|
+
|
|
1349
1384
|
allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
|
|
1350
1385
|
assert allowed_child_taxa is not None
|
|
1351
|
-
|
|
1352
|
-
# If this is the lowest-level allowable token or there is not a
|
|
1386
|
+
|
|
1387
|
+
# If this is the lowest-level allowable token or there is not a
|
|
1353
1388
|
# unique child, don't walk any further, even if walking down
|
|
1354
1389
|
# is enabled.
|
|
1355
1390
|
if (None in allowed_child_taxa):
|
|
1356
1391
|
assert len(allowed_child_taxa) == 1
|
|
1357
|
-
|
|
1392
|
+
|
|
1358
1393
|
if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
|
|
1359
1394
|
target_taxon = test_taxon_name
|
|
1360
1395
|
elif not allow_walk_down:
|
|
@@ -1370,72 +1405,72 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1370
1405
|
allowed_child_taxa = \
|
|
1371
1406
|
allowed_parent_taxon_to_child_taxa[candidate_taxon]
|
|
1372
1407
|
target_taxon = candidate_taxon
|
|
1373
|
-
|
|
1408
|
+
|
|
1374
1409
|
# ...if this is an allowed taxon
|
|
1375
|
-
|
|
1410
|
+
|
|
1376
1411
|
test_index -= 1
|
|
1377
|
-
|
|
1412
|
+
|
|
1378
1413
|
# ...for each token
|
|
1379
|
-
|
|
1414
|
+
|
|
1380
1415
|
if target_taxon is None:
|
|
1381
|
-
output_taxon_string = animal_prediction_string
|
|
1416
|
+
output_taxon_string = animal_prediction_string
|
|
1382
1417
|
else:
|
|
1383
1418
|
output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
|
|
1384
|
-
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
1385
|
-
|
|
1419
|
+
input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
|
|
1420
|
+
|
|
1386
1421
|
# ...for each category
|
|
1387
|
-
|
|
1388
|
-
|
|
1422
|
+
|
|
1423
|
+
|
|
1389
1424
|
##%% Build the new tables
|
|
1390
|
-
|
|
1425
|
+
|
|
1391
1426
|
input_category_id_to_output_category_id = {}
|
|
1392
1427
|
output_taxon_string_to_category_id = {}
|
|
1393
1428
|
output_category_id_to_common_name = {}
|
|
1394
|
-
|
|
1429
|
+
|
|
1395
1430
|
for input_category_id in input_category_id_to_output_taxon_string:
|
|
1396
|
-
|
|
1431
|
+
|
|
1397
1432
|
original_common_name = \
|
|
1398
1433
|
input_category_id_to_common_name[input_category_id]
|
|
1399
1434
|
original_taxon_string = \
|
|
1400
1435
|
input_category_id_to_taxonomy_string[input_category_id]
|
|
1401
1436
|
output_taxon_string = \
|
|
1402
1437
|
input_category_id_to_output_taxon_string[input_category_id]
|
|
1403
|
-
|
|
1438
|
+
|
|
1404
1439
|
output_common_name = output_taxon_string.split(';')[-1]
|
|
1405
|
-
|
|
1440
|
+
|
|
1406
1441
|
# Do we need to create a new output category?
|
|
1407
1442
|
if output_taxon_string not in output_taxon_string_to_category_id:
|
|
1408
1443
|
output_category_id = str(len(output_taxon_string_to_category_id))
|
|
1409
1444
|
output_taxon_string_to_category_id[output_taxon_string] = \
|
|
1410
1445
|
output_category_id
|
|
1411
1446
|
output_category_id_to_common_name[output_category_id] = \
|
|
1412
|
-
output_common_name
|
|
1447
|
+
output_common_name
|
|
1413
1448
|
else:
|
|
1414
1449
|
output_category_id = \
|
|
1415
1450
|
output_taxon_string_to_category_id[output_taxon_string]
|
|
1416
|
-
|
|
1451
|
+
|
|
1417
1452
|
input_category_id_to_output_category_id[input_category_id] = \
|
|
1418
1453
|
output_category_id
|
|
1419
|
-
|
|
1454
|
+
|
|
1420
1455
|
if False:
|
|
1421
1456
|
print('Mapping {} ({}) to:\n{} ({})\n'.format(
|
|
1422
1457
|
original_common_name,original_taxon_string,
|
|
1423
1458
|
output_common_name,output_taxon_string))
|
|
1424
|
-
if False:
|
|
1459
|
+
if False:
|
|
1425
1460
|
print('Mapping {} to {}'.format(
|
|
1426
1461
|
original_common_name,output_common_name,))
|
|
1427
|
-
|
|
1462
|
+
|
|
1428
1463
|
# ...for each category
|
|
1429
|
-
|
|
1430
|
-
|
|
1464
|
+
|
|
1465
|
+
|
|
1431
1466
|
##%% Remap all category labels
|
|
1432
|
-
|
|
1467
|
+
|
|
1433
1468
|
assert len(set(output_taxon_string_to_category_id.keys())) == \
|
|
1434
1469
|
len(set(output_taxon_string_to_category_id.values()))
|
|
1435
|
-
|
|
1470
|
+
|
|
1436
1471
|
output_category_id_to_taxon_string = \
|
|
1437
1472
|
invert_dictionary(output_taxon_string_to_category_id)
|
|
1438
|
-
|
|
1473
|
+
|
|
1439
1474
|
with open(input_file,'r') as f:
|
|
1440
1475
|
output_data = json.load(f)
|
|
1441
1476
|
|
|
@@ -1447,7 +1482,7 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1447
1482
|
|
|
1448
1483
|
if 'detections' not in im or im['detections'] is None:
|
|
1449
1484
|
continue
|
|
1450
|
-
|
|
1485
|
+
|
|
1451
1486
|
# Possibly prepare a pre-filtering description
|
|
1452
1487
|
pre_filtering_description = None
|
|
1453
1488
|
if classification_descriptions is not None and add_pre_filtering_description:
|
|
@@ -1462,16 +1497,16 @@ def restrict_to_taxa_list(taxa_list,
|
|
|
1462
1497
|
classification[0] = \
|
|
1463
1498
|
input_category_id_to_output_category_id[classification[0]]
|
|
1464
1499
|
|
|
1465
|
-
# ...for each image
|
|
1466
|
-
|
|
1500
|
+
# ...for each image
|
|
1501
|
+
|
|
1467
1502
|
output_data['classification_categories'] = output_category_id_to_common_name
|
|
1468
1503
|
output_data['classification_category_descriptions'] = \
|
|
1469
1504
|
output_category_id_to_taxon_string
|
|
1470
|
-
|
|
1471
|
-
|
|
1505
|
+
|
|
1506
|
+
|
|
1472
1507
|
##%% Write output
|
|
1473
|
-
|
|
1508
|
+
|
|
1474
1509
|
with open(output_file,'w') as f:
|
|
1475
1510
|
json.dump(output_data,f,indent=1)
|
|
1476
|
-
|
|
1511
|
+
|
|
1477
1512
|
# ...def restrict_to_taxa_list(...)
|