megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
- megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/detection/run_detector.py +1 -0
- megadetector/detection/run_detector_batch.py +5 -4
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/compare_batch_results.py +176 -9
- megadetector/postprocessing/create_crop_folder.py +420 -0
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +158 -44
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +76 -3
- megadetector/utils/directory_listing.py +4 -4
- megadetector/utils/gpu_test.py +21 -3
- megadetector/utils/md_tests.py +142 -49
- megadetector/utils/path_utils.py +342 -19
- megadetector/utils/wi_utils.py +1286 -212
- megadetector/visualization/visualization_utils.py +16 -4
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
|
@@ -4,10 +4,10 @@ classification_postprocessing.py
|
|
|
4
4
|
|
|
5
5
|
Functions for postprocessing species classification results, particularly:
|
|
6
6
|
|
|
7
|
-
* Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
|
|
8
|
-
is really just a deer)
|
|
9
7
|
* Smoothing results within an image (an image with 700 cows and one deer is really just 701
|
|
10
8
|
cows)
|
|
9
|
+
* Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
|
|
10
|
+
is really just a deer)
|
|
11
11
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
@@ -20,183 +20,219 @@ from collections import defaultdict
|
|
|
20
20
|
from tqdm import tqdm
|
|
21
21
|
|
|
22
22
|
from megadetector.utils.ct_utils import is_list_sorted
|
|
23
|
+
from megadetector.utils.wi_utils import clean_taxonomy_string
|
|
24
|
+
from megadetector.utils.wi_utils import taxonomy_level_index
|
|
25
|
+
from megadetector.utils.wi_utils import taxonomy_level_string_to_index
|
|
26
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
#%% Options classes
|
|
26
30
|
|
|
27
|
-
class
|
|
31
|
+
class ClassificationSmoothingOptions:
|
|
28
32
|
"""
|
|
29
33
|
Options used to parameterize smooth_classification_results_image_level()
|
|
34
|
+
and smooth_classification_results_sequence_level()
|
|
30
35
|
"""
|
|
31
36
|
|
|
32
37
|
def __init__(self):
|
|
33
38
|
|
|
34
|
-
#: How many detections do we need
|
|
35
|
-
#:
|
|
36
|
-
|
|
39
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
40
|
+
#: non-dominant classifications? This is irrelevant if
|
|
41
|
+
#: max_detections_nondominant_class <= 1.
|
|
42
|
+
self.min_detections_to_overwrite_secondary = 4
|
|
37
43
|
|
|
38
|
-
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
39
|
-
#: in an image, leave them alone.
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
#:
|
|
44
|
+
#: Even if we have a dominant class, if a non-dominant class has at least
|
|
45
|
+
#: this many classifications in an image, leave them alone.
|
|
46
|
+
#:
|
|
47
|
+
#: If this is <= 1, we won't replace non-dominant, non-other classes
|
|
48
|
+
#: with the dominant class, even if there are 900 cows and 1 deer.
|
|
49
|
+
self.max_detections_nondominant_class = 1
|
|
50
|
+
|
|
51
|
+
#: How many detections do we need in a dominant category to overwrite
|
|
52
|
+
#: non-dominant classifications in the same family? If this is <= 0,
|
|
53
|
+
#: we'll skip this step. This option doesn't mean anything if
|
|
54
|
+
#: max_detections_nondominant_class_same_family <= 1.
|
|
55
|
+
self.min_detections_to_overwrite_secondary_same_family = 2
|
|
56
|
+
|
|
57
|
+
#: If we have this many classifications of a nondominant category,
|
|
58
|
+
#: we won't do same-family overwrites. <= 1 means "even if there are
|
|
59
|
+
#: a million deer, if there are two million moose, call all the deer
|
|
60
|
+
#: moose". This option doesn't mean anything if
|
|
61
|
+
#: min_detections_to_overwrite_secondary_same_family <= 0.
|
|
62
|
+
self.max_detections_nondominant_class_same_family = -1
|
|
63
|
+
|
|
64
|
+
#: If the dominant class has at least this many classifications, overwrite
|
|
65
|
+
#: "other" classifications with the dominant class
|
|
43
66
|
self.min_detections_to_overwrite_other = 2
|
|
44
67
|
|
|
45
68
|
#: Names to treat as "other" categories; can't be None, but can be empty
|
|
46
|
-
self.other_category_names = ['other']
|
|
47
|
-
|
|
48
|
-
#: What confidence threshold should we use for assessing the dominant category in an image?
|
|
49
|
-
self.classification_confidence_threshold = 0.6
|
|
50
|
-
|
|
51
|
-
#: Which classifications should we even bother over-writing?
|
|
52
|
-
self.classification_overwrite_threshold = 0.3
|
|
53
|
-
|
|
54
|
-
#: Detection confidence threshold for things we count when determining a dominant class
|
|
55
|
-
self.detection_confidence_threshold = 0.2
|
|
56
|
-
|
|
57
|
-
#: Which detections should we even bother over-writing?
|
|
58
|
-
self.detection_overwrite_threshold = 0.05
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class ClassificationSmoothingOptionsSequenceLevel:
|
|
62
|
-
"""
|
|
63
|
-
Options used to parameterize smooth_classification_results_sequence_level()
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
def __init__(self):
|
|
67
|
-
|
|
68
|
-
#: Only process detections in this category
|
|
69
|
-
self.animal_detection_category = '1'
|
|
70
|
-
|
|
71
|
-
#: Treat category names on this list as "other", which can be flipped to common
|
|
72
|
-
#: categories.
|
|
73
|
-
self.other_category_names = set(['other'])
|
|
74
|
-
|
|
75
|
-
#: These are the only classes to which we're going to switch "other" classifications.
|
|
76
|
-
#:
|
|
77
|
-
#: Example:
|
|
78
|
-
#:
|
|
79
|
-
#: ['deer','elk','cow','canid','cat','bird','bear']
|
|
80
|
-
self.category_names_to_smooth_to = None
|
|
81
|
-
|
|
82
|
-
#: Only switch classifications to the dominant class if we see the dominant class at least
|
|
83
|
-
#: this many times
|
|
84
|
-
self.min_dominant_class_classifications_above_threshold_for_class_smoothing = 5 # 2
|
|
85
|
-
|
|
86
|
-
#: If we see more than this many of a class that are above threshold, don't switch those
|
|
87
|
-
#: classifications to the dominant class.
|
|
88
|
-
self.max_secondary_class_classifications_above_threshold_for_class_smoothing = 5
|
|
89
|
-
|
|
90
|
-
#: If the ratio between a dominant class and a secondary class count is greater than this,
|
|
91
|
-
#: regardless of the secondary class count, switch those classifications (i.e., ignore
|
|
92
|
-
#: max_secondary_class_classifications_above_threshold_for_class_smoothing).
|
|
93
69
|
#:
|
|
94
|
-
#:
|
|
95
|
-
#:
|
|
70
|
+
#: "Other" classifications will be changed to the dominant category, regardless
|
|
71
|
+
#: of confidence, as long as there are at least min_detections_to_overwrite_other
|
|
72
|
+
#: examples of the dominant class. For example, cow/other will remain unchanged,
|
|
73
|
+
#: but cow/cow/other will become cow/cow/cow.
|
|
74
|
+
self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
|
|
75
|
+
|
|
76
|
+
#: We're not even going to mess around with classifications below this threshold.
|
|
96
77
|
#:
|
|
97
|
-
#:
|
|
78
|
+
#: We won't count them, we won't over-write them, they don't exist during the
|
|
79
|
+
#: within-image smoothing step.
|
|
80
|
+
self.classification_confidence_threshold = 0.5
|
|
81
|
+
|
|
82
|
+
#: We're not even going to mess around with detections below this threshold.
|
|
98
83
|
#:
|
|
99
|
-
#:
|
|
84
|
+
#: We won't count them, we won't over-write them, they don't exist during the
|
|
85
|
+
#: within-image smoothing step.
|
|
86
|
+
self.detection_confidence_threshold = 0.15
|
|
87
|
+
|
|
88
|
+
#: If classification descriptions are present and appear to represent taxonomic
|
|
89
|
+
#: information, should we propagate classifications when lower-level taxa are more
|
|
90
|
+
#: common in an image? For example, if we see "carnivore/fox/fox/deer", should
|
|
91
|
+
#: we make that "fox/fox/fox/deer"?
|
|
92
|
+
self.propagate_classifications_through_taxonomy = True
|
|
93
|
+
|
|
94
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
95
|
+
#: decide whether we prefer more frequent categories or more specific categories.
|
|
96
|
+
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
97
|
+
#: balance levels against counts in this process.
|
|
98
|
+
self.taxonomy_propagation_level_weight = 1.0
|
|
99
|
+
|
|
100
|
+
#: When propagating classifications down through taxonomy levels, we have to
|
|
101
|
+
#: decide whether we prefer more frequent categories or more specific categories.
|
|
102
|
+
#: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
|
|
103
|
+
#: balance levels against counts in this process.
|
|
100
104
|
#:
|
|
101
|
-
#:
|
|
102
|
-
self.
|
|
103
|
-
|
|
104
|
-
#: If there are at least this many classifications for the dominant class in a sequence,
|
|
105
|
-
#: regardless of what that class is, convert all 'other' classifications (regardless of
|
|
106
|
-
#: confidence) to that class.
|
|
107
|
-
self.min_dominant_class_classifications_above_threshold_for_other_smoothing = 3 # 2
|
|
108
|
-
|
|
109
|
-
#: If there are at least this many classifications for the dominant class in a sequence,
|
|
110
|
-
#: regardless of what that class is, classify all previously-unclassified detections
|
|
111
|
-
#: as that class.
|
|
112
|
-
self.min_dominant_class_classifications_above_threshold_for_unclassified_smoothing = 3 # 2
|
|
105
|
+
#: With a very low default value, this just breaks ties.
|
|
106
|
+
self.taxonomy_propagation_count_weight = 0.01
|
|
113
107
|
|
|
114
|
-
#:
|
|
115
|
-
|
|
116
|
-
self.classification_confidence_threshold = 0.6
|
|
108
|
+
#: Should we record information about the state of labels prior to smoothing?
|
|
109
|
+
self.add_pre_smoothing_description = True
|
|
117
110
|
|
|
118
|
-
#:
|
|
119
|
-
#:
|
|
120
|
-
self.
|
|
111
|
+
#: When a dict (rather than a file) is passed to either smoothing function,
|
|
112
|
+
#: if this is True, we'll make a copy of the input dict before modifying.
|
|
113
|
+
self.modify_in_place = False
|
|
121
114
|
|
|
122
|
-
#:
|
|
123
|
-
|
|
124
|
-
self.flipped_class_confidence_value = 0.6
|
|
125
|
-
|
|
126
|
-
#: Confidence values to use when we change a detection's classification (the
|
|
127
|
-
#: original confidence value is irrelevant at that point) (for previously unclassified detections)
|
|
128
|
-
self.flipped_unclassified_confidence_value = 0.6
|
|
129
|
-
|
|
130
|
-
#: Only flip the class label unclassified detections if the detection confidence exceeds this threshold
|
|
131
|
-
self.min_detection_confidence_for_unclassified_flipping = 0.15
|
|
132
|
-
|
|
133
|
-
#: Only relevant when MegaDetector results are supplied as a dict rather than a file; determines
|
|
134
|
-
#: whether smoothing happens in place.
|
|
135
|
-
self.modify_in_place = True
|
|
136
|
-
|
|
137
|
-
# ...class ClassificationSmoothingOptionsSequenceLevel()
|
|
115
|
+
#: Debug options
|
|
116
|
+
self.break_at_image = None
|
|
138
117
|
|
|
118
|
+
|
|
119
|
+
#%% Utility functions
|
|
120
|
+
|
|
121
|
+
def _results_for_sequence(images_this_sequence,filename_to_results):
|
|
122
|
+
"""
|
|
123
|
+
Fetch MD results for every image in this sequence, based on the 'file_name' field
|
|
124
|
+
"""
|
|
139
125
|
|
|
140
|
-
|
|
126
|
+
results_this_sequence = []
|
|
127
|
+
for im in images_this_sequence:
|
|
128
|
+
fn = im['file_name']
|
|
129
|
+
results_this_image = filename_to_results[fn]
|
|
130
|
+
assert isinstance(results_this_image,dict)
|
|
131
|
+
results_this_sequence.append(results_this_image)
|
|
132
|
+
|
|
133
|
+
return results_this_sequence
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _sort_images_by_time(images):
|
|
137
|
+
"""
|
|
138
|
+
Returns a copy of [images], sorted by the 'datetime' field (ascending).
|
|
139
|
+
"""
|
|
140
|
+
return sorted(images, key = lambda im: im['datetime'])
|
|
141
141
|
|
|
142
|
-
|
|
142
|
+
|
|
143
|
+
def _count_detections_by_category(detections,options):
|
|
143
144
|
"""
|
|
144
|
-
|
|
145
|
-
|
|
145
|
+
Count the number of instances of each category in the detections list
|
|
146
|
+
[detections] that have an above-threshold detection. Sort results in descending
|
|
147
|
+
order by count. Returns a dict mapping category ID --> count. If no detections
|
|
148
|
+
are above threshold, returns an empty dict.
|
|
146
149
|
|
|
147
|
-
|
|
148
|
-
|
|
150
|
+
Assumes that if the 'classifications' field is present for a detection, it has
|
|
151
|
+
length 1, i.e. that non-top classifications have already been removed.
|
|
152
|
+
"""
|
|
149
153
|
|
|
150
|
-
|
|
151
|
-
[options.classification_confidence_threshold], which in practice means we're only
|
|
152
|
-
looking at one category per detection.
|
|
154
|
+
category_to_count = defaultdict(int)
|
|
153
155
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
156
|
+
for det in detections:
|
|
157
|
+
if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
|
|
158
|
+
assert len(det['classifications']) == 1
|
|
159
|
+
c = det['classifications'][0]
|
|
160
|
+
if c[1] >= options.classification_confidence_threshold:
|
|
161
|
+
category_to_count[c[0]] += 1
|
|
162
|
+
|
|
163
|
+
category_to_count = {k: v for k, v in sorted(category_to_count.items(),
|
|
164
|
+
key=lambda item: item[1],
|
|
165
|
+
reverse=True)}
|
|
158
166
|
|
|
159
|
-
|
|
160
|
-
|
|
167
|
+
return category_to_count
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _get_description_string(category_to_count,classification_descriptions):
|
|
171
|
+
"""
|
|
172
|
+
Return a string summarizing the image content according to [category_to_count].
|
|
173
|
+
"""
|
|
161
174
|
|
|
162
|
-
|
|
175
|
+
category_strings = []
|
|
176
|
+
# category_id = next(iter(category_to_count))
|
|
177
|
+
for category_id in category_to_count:
|
|
178
|
+
category_description = classification_descriptions[category_id]
|
|
179
|
+
tokens = category_description.split(';')
|
|
180
|
+
assert len(tokens) == 7
|
|
181
|
+
category_name = tokens[-1]
|
|
182
|
+
if len(category_name) == 0:
|
|
183
|
+
category_name = 'undefined category'
|
|
184
|
+
count = category_to_count[category_id]
|
|
185
|
+
category_string = '{} ({})'.format(category_name,count)
|
|
186
|
+
category_strings.append(category_string)
|
|
163
187
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
172
|
-
[output_file] if [output_file] is not None.
|
|
188
|
+
return ', '.join(category_strings)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _print_counts_with_names(category_to_count,classification_descriptions):
|
|
192
|
+
"""
|
|
193
|
+
Print a list of classification categories with counts, based in the name --> count
|
|
194
|
+
dict [category_to_count]
|
|
173
195
|
"""
|
|
174
196
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
197
|
+
for category_id in category_to_count:
|
|
198
|
+
category_name = classification_descriptions[category_id]
|
|
199
|
+
count = category_to_count[category_id]
|
|
200
|
+
print('{}: {} ({})'.format(category_id,category_name,count))
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _prepare_results_for_smoothing(input_file,options):
|
|
204
|
+
"""
|
|
205
|
+
Load results from [input_file] if necessary, prepare category descrptions
|
|
206
|
+
for smoothing. Adds pre-smoothing descriptions to every image if the options
|
|
207
|
+
say we're supposed to do that.
|
|
208
|
+
"""
|
|
181
209
|
|
|
210
|
+
if isinstance(input_file,str):
|
|
211
|
+
with open(input_file,'r') as f:
|
|
212
|
+
print('Loading results from:\n{}'.format(input_file))
|
|
213
|
+
d = json.load(f)
|
|
214
|
+
else:
|
|
215
|
+
assert isinstance(input_file,dict)
|
|
216
|
+
if options.modify_in_place:
|
|
217
|
+
d = input_file
|
|
218
|
+
else:
|
|
219
|
+
print('modify_in_place is False, copying the input before modifying')
|
|
220
|
+
d = copy.deepcopy(input_file)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
## Category processing
|
|
224
|
+
|
|
182
225
|
category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
|
|
183
226
|
other_category_ids = []
|
|
184
227
|
for s in options.other_category_names:
|
|
185
228
|
if s in category_name_to_id:
|
|
186
229
|
other_category_ids.append(category_name_to_id[s])
|
|
187
|
-
|
|
188
|
-
print('Warning: "other" category {} not present in file {}'.format(
|
|
189
|
-
s,input_file))
|
|
190
|
-
|
|
191
|
-
n_other_classifications_changed = 0
|
|
192
|
-
n_other_images_changed = 0
|
|
193
|
-
|
|
194
|
-
n_detections_flipped = 0
|
|
195
|
-
n_images_changed = 0
|
|
196
|
-
|
|
230
|
+
|
|
197
231
|
# Before we do anything else, get rid of everything but the top classification
|
|
198
|
-
# for each detection
|
|
199
|
-
|
|
232
|
+
# for each detection, and remove the 'classifications' field from detections with
|
|
233
|
+
# no classifications.
|
|
234
|
+
for im in tqdm(d['images']):
|
|
235
|
+
|
|
200
236
|
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
201
237
|
continue
|
|
202
238
|
|
|
@@ -204,7 +240,10 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
204
240
|
|
|
205
241
|
for det in detections:
|
|
206
242
|
|
|
207
|
-
if 'classifications' not in det
|
|
243
|
+
if 'classifications' not in det:
|
|
244
|
+
continue
|
|
245
|
+
if len(det['classifications']) == 0:
|
|
246
|
+
del det['classifications']
|
|
208
247
|
continue
|
|
209
248
|
|
|
210
249
|
classification_confidence_values = [c[1] for c in det['classifications']]
|
|
@@ -215,271 +254,565 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
|
|
|
215
254
|
|
|
216
255
|
# ...for each image
|
|
217
256
|
|
|
218
|
-
|
|
219
|
-
|
|
257
|
+
|
|
258
|
+
## Clean up classification descriptions so we can test taxonomic relationships
|
|
259
|
+
## by substring testing.
|
|
260
|
+
|
|
261
|
+
classification_descriptions_clean = None
|
|
262
|
+
classification_descriptions = None
|
|
263
|
+
|
|
264
|
+
if 'classification_category_descriptions' in d:
|
|
265
|
+
classification_descriptions = d['classification_category_descriptions']
|
|
266
|
+
classification_descriptions_clean = {}
|
|
267
|
+
# category_id = next(iter(classification_descriptions))
|
|
268
|
+
for category_id in classification_descriptions:
|
|
269
|
+
classification_descriptions_clean[category_id] = \
|
|
270
|
+
clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
## Optionally add pre-smoothing descriptions to every image
|
|
274
|
+
|
|
275
|
+
if options.add_pre_smoothing_description:
|
|
220
276
|
|
|
221
|
-
|
|
222
|
-
|
|
277
|
+
for im in tqdm(d['images']):
|
|
278
|
+
|
|
279
|
+
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
detections = im['detections']
|
|
283
|
+
category_to_count = _count_detections_by_category(detections, options)
|
|
284
|
+
|
|
285
|
+
im['pre_smoothing_description'] = \
|
|
286
|
+
_get_description_string(category_to_count, classification_descriptions)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
'd':d,
|
|
291
|
+
'other_category_ids':other_category_ids,
|
|
292
|
+
'classification_descriptions_clean':classification_descriptions_clean,
|
|
293
|
+
'classification_descriptions':classification_descriptions
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# ...def _prepare_results_for_smoothing(...)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _smooth_classifications_for_list_of_detections(detections,
|
|
300
|
+
options,
|
|
301
|
+
other_category_ids,
|
|
302
|
+
classification_descriptions,
|
|
303
|
+
classification_descriptions_clean):
|
|
304
|
+
"""
|
|
305
|
+
Smooth classifications for a list of detections, which may have come from a single
|
|
306
|
+
image, or may represent an entire sequence.
|
|
307
|
+
|
|
308
|
+
Returns None if no changes are made, else a dict.
|
|
309
|
+
|
|
310
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
311
|
+
from which common names and GUIDs have already been removed.
|
|
312
|
+
|
|
313
|
+
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
314
|
+
have already been remoevd.
|
|
315
|
+
"""
|
|
316
|
+
|
|
317
|
+
## Count the number of instances of each category in this image
|
|
318
|
+
|
|
319
|
+
category_to_count = _count_detections_by_category(detections, options)
|
|
320
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
321
|
+
# _get_description_string(category_to_count, classification_descriptions)
|
|
223
322
|
|
|
224
|
-
|
|
323
|
+
if len(category_to_count) <= 1:
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
keys = list(category_to_count.keys())
|
|
225
327
|
|
|
226
|
-
|
|
328
|
+
# Handle a quirky special case: if the most common category is "other" and
|
|
329
|
+
# it's "tied" with the second-most-common category, swap them
|
|
330
|
+
if (len(keys) > 1) and \
|
|
331
|
+
(keys[0] in other_category_ids) and \
|
|
332
|
+
(keys[1] not in other_category_ids) and \
|
|
333
|
+
(category_to_count[keys[0]] == category_to_count[keys[1]]):
|
|
334
|
+
keys[1], keys[0] = keys[0], keys[1]
|
|
335
|
+
|
|
336
|
+
max_count = category_to_count[keys[0]]
|
|
337
|
+
most_common_category = keys[0]
|
|
338
|
+
del keys
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
## Debug tools
|
|
342
|
+
|
|
343
|
+
verbose_debug_enabled = False
|
|
344
|
+
|
|
345
|
+
if options.break_at_image is not None:
|
|
227
346
|
for det in detections:
|
|
228
|
-
if
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
category_to_count[c[0]] += 1
|
|
232
|
-
# ...for each classification
|
|
233
|
-
# ...if there are classifications for this detection
|
|
234
|
-
# ...for each detection
|
|
235
|
-
|
|
236
|
-
if len(category_to_count) <= 1:
|
|
237
|
-
continue
|
|
238
|
-
|
|
239
|
-
category_to_count = {k: v for k, v in sorted(category_to_count.items(),
|
|
240
|
-
key=lambda item: item[1],
|
|
241
|
-
reverse=True)}
|
|
242
|
-
|
|
243
|
-
keys = list(category_to_count.keys())
|
|
244
|
-
|
|
245
|
-
# Handle a quirky special case: if the most common category is "other" and
|
|
246
|
-
# it's "tied" with the second-most-common category, swap them
|
|
247
|
-
if (len(keys) > 1) and \
|
|
248
|
-
(keys[0] in other_category_ids) and \
|
|
249
|
-
(keys[1] not in other_category_ids) and \
|
|
250
|
-
(category_to_count[keys[0]] == category_to_count[keys[1]]):
|
|
251
|
-
keys[1], keys[0] = keys[0], keys[1]
|
|
252
|
-
|
|
253
|
-
max_count = category_to_count[keys[0]]
|
|
254
|
-
# secondary_count = category_to_count[keys[1]]
|
|
255
|
-
# The 'secondary count' is the most common non-other class
|
|
256
|
-
secondary_count = 0
|
|
257
|
-
for i_key in range(1,len(keys)):
|
|
258
|
-
if keys[i_key] not in other_category_ids:
|
|
259
|
-
secondary_count = category_to_count[keys[i_key]]
|
|
347
|
+
if 'image_filename' in det and \
|
|
348
|
+
det['image_filename'] == options.break_at_image:
|
|
349
|
+
verbose_debug_enabled = True
|
|
260
350
|
break
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
351
|
+
|
|
352
|
+
if verbose_debug_enabled:
|
|
353
|
+
_print_counts_with_names(category_to_count,classification_descriptions)
|
|
354
|
+
import pdb; pdb.set_trace()
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
## Possibly change "other" classifications to the most common category
|
|
358
|
+
|
|
359
|
+
# ...if the dominant category is not an "other" category.
|
|
360
|
+
|
|
361
|
+
n_other_classifications_changed_this_image = 0
|
|
362
|
+
|
|
363
|
+
# If we have at least *min_detections_to_overwrite_other* in a category that isn't
|
|
364
|
+
# "other", change all "other" classifications to that category
|
|
365
|
+
if (max_count >= options.min_detections_to_overwrite_other) and \
|
|
366
|
+
(most_common_category not in other_category_ids):
|
|
265
367
|
|
|
266
|
-
|
|
267
|
-
# "other", change all "other" classifications to that category
|
|
268
|
-
if max_count >= options.min_detections_to_overwrite_other and \
|
|
269
|
-
most_common_category not in other_category_ids:
|
|
368
|
+
for det in detections:
|
|
270
369
|
|
|
271
|
-
|
|
370
|
+
if ('classifications' not in det) or \
|
|
371
|
+
(det['conf'] < options.detection_confidence_threshold):
|
|
372
|
+
continue
|
|
272
373
|
|
|
273
|
-
|
|
374
|
+
assert len(det['classifications']) == 1
|
|
375
|
+
c = det['classifications'][0]
|
|
274
376
|
|
|
275
|
-
|
|
276
|
-
|
|
377
|
+
if (c[1] >= options.classification_confidence_threshold) and \
|
|
378
|
+
(c[0] in other_category_ids):
|
|
277
379
|
|
|
278
|
-
|
|
380
|
+
n_other_classifications_changed_this_image += 1
|
|
381
|
+
c[0] = most_common_category
|
|
382
|
+
|
|
383
|
+
# ...if there are classifications for this detection
|
|
384
|
+
|
|
385
|
+
# ...for each detection
|
|
386
|
+
|
|
387
|
+
# ...if we should overwrite all "other" classifications
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
## Re-count
|
|
391
|
+
|
|
392
|
+
category_to_count = _count_detections_by_category(detections, options)
|
|
393
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
394
|
+
keys = list(category_to_count.keys())
|
|
395
|
+
max_count = category_to_count[keys[0]]
|
|
396
|
+
most_common_category = keys[0]
|
|
397
|
+
del keys
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
## Possibly change some non-dominant classifications to the dominant category
|
|
401
|
+
|
|
402
|
+
n_detections_flipped_this_image = 0
|
|
403
|
+
|
|
404
|
+
# Don't do this if the most common category is an "other" category, or
|
|
405
|
+
# if we don't have enough of the most common category
|
|
406
|
+
if (most_common_category not in other_category_ids) and \
|
|
407
|
+
(max_count >= options.min_detections_to_overwrite_secondary):
|
|
408
|
+
|
|
409
|
+
# i_det = 0; det = detections[i_det]
|
|
410
|
+
for i_det,det in enumerate(detections):
|
|
279
411
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
n_other_classifications_changed += 1
|
|
284
|
-
other_change_made = True
|
|
285
|
-
c[0] = most_common_category
|
|
286
|
-
|
|
287
|
-
# ...for each classification
|
|
288
|
-
|
|
289
|
-
# ...if there are classifications for this detection
|
|
412
|
+
if ('classifications' not in det) or \
|
|
413
|
+
(det['conf'] < options.detection_confidence_threshold):
|
|
414
|
+
continue
|
|
290
415
|
|
|
291
|
-
|
|
416
|
+
assert len(det['classifications']) == 1
|
|
417
|
+
c = det['classifications'][0]
|
|
292
418
|
|
|
293
|
-
|
|
294
|
-
|
|
419
|
+
# Don't over-write the most common category with itself
|
|
420
|
+
if c[0] == most_common_category:
|
|
421
|
+
continue
|
|
422
|
+
|
|
423
|
+
# Don't bother with below-threshold classifications
|
|
424
|
+
if c[1] < options.classification_confidence_threshold:
|
|
425
|
+
continue
|
|
426
|
+
|
|
427
|
+
# If we have fewer of this category than the most common category,
|
|
428
|
+
# but not *too* many, flip it to the most common category.
|
|
429
|
+
if (max_count > category_to_count[c[0]]) and \
|
|
430
|
+
(category_to_count[c[0]] <= options.max_detections_nondominant_class):
|
|
431
|
+
|
|
432
|
+
c[0] = most_common_category
|
|
433
|
+
n_detections_flipped_this_image += 1
|
|
295
434
|
|
|
296
|
-
# ...
|
|
435
|
+
# ...for each detection
|
|
436
|
+
|
|
437
|
+
# ...if the dominant category is legit
|
|
297
438
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
439
|
+
|
|
440
|
+
## Re-count
|
|
441
|
+
|
|
442
|
+
category_to_count = _count_detections_by_category(detections, options)
|
|
443
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
444
|
+
keys = list(category_to_count.keys())
|
|
445
|
+
max_count = category_to_count[keys[0]]
|
|
446
|
+
most_common_category = keys[0]
|
|
447
|
+
del keys
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
## Possibly collapse higher-level taxonomic predictions down to lower levels
|
|
451
|
+
|
|
452
|
+
# ...when the most common class is a child of a less common class.
|
|
453
|
+
|
|
454
|
+
n_taxonomic_changes_this_image = 0
|
|
455
|
+
|
|
456
|
+
process_taxonomic_rules = \
|
|
457
|
+
(classification_descriptions_clean is not None) and \
|
|
458
|
+
(len(classification_descriptions_clean) > 0) and \
|
|
459
|
+
(len(category_to_count) > 1)
|
|
460
|
+
|
|
461
|
+
if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
|
|
462
|
+
|
|
463
|
+
# det = detections[3]
|
|
311
464
|
for det in detections:
|
|
312
465
|
|
|
313
|
-
if ('classifications' in det)
|
|
314
|
-
(det['conf']
|
|
315
|
-
|
|
316
|
-
for c in det['classifications']:
|
|
317
|
-
if c[1] >= options.classification_overwrite_threshold and \
|
|
318
|
-
c[0] != most_common_category:
|
|
466
|
+
if ('classifications' not in det) or \
|
|
467
|
+
(det['conf'] < options.detection_confidence_threshold):
|
|
468
|
+
continue
|
|
319
469
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
470
|
+
assert len(det['classifications']) == 1
|
|
471
|
+
c = det['classifications'][0]
|
|
472
|
+
|
|
473
|
+
# Don't bother with any classifications below the confidence threshold
|
|
474
|
+
if c[1] < options.classification_confidence_threshold:
|
|
475
|
+
continue
|
|
476
|
+
|
|
477
|
+
category_id_this_classification = c[0]
|
|
478
|
+
assert category_id_this_classification in category_to_count
|
|
479
|
+
|
|
480
|
+
category_description_this_classification = \
|
|
481
|
+
classification_descriptions_clean[category_id_this_classification]
|
|
482
|
+
|
|
483
|
+
# An empty description corresponds to the "animal" category. We don't handle
|
|
484
|
+
# "animal" here as a parent category, that would be handled in the "other smoothing"
|
|
485
|
+
# step above.
|
|
486
|
+
if len(category_description_this_classification) == 0:
|
|
487
|
+
continue
|
|
488
|
+
|
|
489
|
+
# We may have multiple child categories to choose from; this keeps track of
|
|
490
|
+
# the "best" we've seen so far. "Best" is based on the level (species is better
|
|
491
|
+
# than genus) and number.
|
|
492
|
+
child_category_to_score = defaultdict(float)
|
|
493
|
+
|
|
494
|
+
for category_id_of_candidate_child in category_to_count.keys():
|
|
495
|
+
|
|
496
|
+
# A category is never its own child
|
|
497
|
+
if category_id_of_candidate_child == category_id_this_classification:
|
|
498
|
+
continue
|
|
323
499
|
|
|
324
|
-
#
|
|
500
|
+
# Is this candidate a child of the current classification?
|
|
501
|
+
category_description_candidate_child = \
|
|
502
|
+
classification_descriptions_clean[category_id_of_candidate_child]
|
|
325
503
|
|
|
326
|
-
|
|
504
|
+
# An empty description corresponds to "animal", which can never
|
|
505
|
+
# be a child of another category.
|
|
506
|
+
if len(category_description_candidate_child) == 0:
|
|
507
|
+
continue
|
|
508
|
+
|
|
509
|
+
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
510
|
+
# relationships are defined by a substring relationship
|
|
511
|
+
is_child = category_description_this_classification in \
|
|
512
|
+
category_description_candidate_child
|
|
513
|
+
if not is_child:
|
|
514
|
+
continue
|
|
515
|
+
|
|
516
|
+
# How many instances of this child category are there?
|
|
517
|
+
child_category_count = category_to_count[category_id_of_candidate_child]
|
|
518
|
+
|
|
519
|
+
# What taxonomy level is this child category defined at?
|
|
520
|
+
child_category_level = taxonomy_level_index(
|
|
521
|
+
classification_descriptions[category_id_of_candidate_child])
|
|
522
|
+
|
|
523
|
+
child_category_to_score[category_id_of_candidate_child] = \
|
|
524
|
+
child_category_level * options.taxonomy_propagation_level_weight + \
|
|
525
|
+
child_category_count * options.taxonomy_propagation_count_weight
|
|
526
|
+
|
|
527
|
+
# ...for each category we are considering reducing this classification to
|
|
528
|
+
|
|
529
|
+
# Did we find a category we want to change this classification to?
|
|
530
|
+
if len(child_category_to_score) > 0:
|
|
531
|
+
|
|
532
|
+
# Find the child category with the highest score
|
|
533
|
+
child_category_to_score = sort_dictionary_by_value(
|
|
534
|
+
child_category_to_score,reverse=True)
|
|
535
|
+
best_child_category = next(iter(child_category_to_score.keys()))
|
|
536
|
+
|
|
537
|
+
if verbose_debug_enabled:
|
|
538
|
+
old_category_name = \
|
|
539
|
+
classification_descriptions_clean[c[0]]
|
|
540
|
+
new_category_name = \
|
|
541
|
+
classification_descriptions_clean[best_child_category]
|
|
542
|
+
print('Replacing {} with {}'.format(
|
|
543
|
+
old_category_name,new_category_name))
|
|
544
|
+
|
|
545
|
+
c[0] = best_child_category
|
|
546
|
+
n_taxonomic_changes_this_image += 1
|
|
327
547
|
|
|
328
548
|
# ...for each detection
|
|
329
549
|
|
|
330
|
-
|
|
331
|
-
n_images_changed += 1
|
|
550
|
+
# ...if we have taxonomic information available
|
|
332
551
|
|
|
333
|
-
# ...for each image
|
|
334
552
|
|
|
335
|
-
|
|
336
|
-
n_detections_flipped,n_images_changed))
|
|
553
|
+
## Re-count
|
|
337
554
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
json.dump(d,f,indent=1)
|
|
345
|
-
|
|
346
|
-
return d
|
|
347
|
-
|
|
348
|
-
# ...def smooth_classification_results_image_level(...)
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
#%% Sequence-level smoothing
|
|
352
|
-
|
|
353
|
-
def _results_for_sequence(images_this_sequence,filename_to_results):
|
|
354
|
-
"""
|
|
355
|
-
Fetch MD results for every image in this sequence, based on the 'file_name' field
|
|
356
|
-
"""
|
|
555
|
+
category_to_count = _count_detections_by_category(detections, options)
|
|
556
|
+
# _print_counts_with_names(category_to_count,classification_descriptions)
|
|
557
|
+
keys = list(category_to_count.keys())
|
|
558
|
+
max_count = category_to_count[keys[0]]
|
|
559
|
+
most_common_category = keys[0]
|
|
560
|
+
del keys
|
|
357
561
|
|
|
358
|
-
results_this_sequence = []
|
|
359
|
-
for im in images_this_sequence:
|
|
360
|
-
fn = im['file_name']
|
|
361
|
-
results_this_image = filename_to_results[fn]
|
|
362
|
-
assert isinstance(results_this_image,dict)
|
|
363
|
-
results_this_sequence.append(results_this_image)
|
|
364
|
-
|
|
365
|
-
return results_this_sequence
|
|
366
|
-
|
|
367
562
|
|
|
368
|
-
|
|
369
|
-
"""
|
|
370
|
-
Return all top-1 animal classifications for every detection in this
|
|
371
|
-
sequence, regardless of confidence
|
|
372
|
-
|
|
373
|
-
May modify [images_this_sequence] (removing non-top-1 classifications)
|
|
374
|
-
"""
|
|
563
|
+
## Possibly do within-family smoothing
|
|
375
564
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
#
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
565
|
+
n_within_family_smoothing_changes = 0
|
|
566
|
+
|
|
567
|
+
# min_detections_to_overwrite_secondary_same_family = -1
|
|
568
|
+
# max_detections_nondominant_class_same_family = 1
|
|
569
|
+
family_level = taxonomy_level_string_to_index('family')
|
|
570
|
+
|
|
571
|
+
if process_taxonomic_rules:
|
|
383
572
|
|
|
384
|
-
|
|
385
|
-
|
|
573
|
+
category_description_most_common_category = \
|
|
574
|
+
classification_descriptions[most_common_category]
|
|
575
|
+
most_common_category_taxonomic_level = \
|
|
576
|
+
taxonomy_level_index(category_description_most_common_category)
|
|
577
|
+
n_most_common_category = category_to_count[most_common_category]
|
|
578
|
+
tokens = category_description_most_common_category.split(';')
|
|
579
|
+
assert len(tokens) == 7
|
|
580
|
+
most_common_category_family = tokens[3]
|
|
581
|
+
most_common_category_genus = tokens[4]
|
|
582
|
+
|
|
583
|
+
# Only consider remapping to genus or species level, and only when we have
|
|
584
|
+
# a high enough count in the most common category
|
|
585
|
+
if process_taxonomic_rules and \
|
|
586
|
+
(options.min_detections_to_overwrite_secondary_same_family > 0) and \
|
|
587
|
+
(most_common_category not in other_category_ids) and \
|
|
588
|
+
(most_common_category_taxonomic_level > family_level) and \
|
|
589
|
+
(n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
|
|
590
|
+
|
|
591
|
+
# det = detections[0]
|
|
592
|
+
for det in detections:
|
|
593
|
+
|
|
594
|
+
if ('classifications' not in det) or \
|
|
595
|
+
(det['conf'] < options.detection_confidence_threshold):
|
|
596
|
+
continue
|
|
597
|
+
|
|
598
|
+
assert len(det['classifications']) == 1
|
|
599
|
+
c = det['classifications'][0]
|
|
600
|
+
|
|
601
|
+
# Don't over-write the most common category with itself
|
|
602
|
+
if c[0] == most_common_category:
|
|
603
|
+
continue
|
|
386
604
|
|
|
387
|
-
|
|
388
|
-
|
|
605
|
+
# Don't bother with below-threshold classifications
|
|
606
|
+
if c[1] < options.classification_confidence_threshold:
|
|
607
|
+
continue
|
|
608
|
+
|
|
609
|
+
n_candidate_flip_category = category_to_count[c[0]]
|
|
610
|
+
|
|
611
|
+
# Do we have too many of the non-dominant category to do this kind of swap?
|
|
612
|
+
if n_candidate_flip_category > \
|
|
613
|
+
options.max_detections_nondominant_class_same_family:
|
|
614
|
+
continue
|
|
615
|
+
|
|
616
|
+
# Don't flip classes when it's a tie
|
|
617
|
+
if n_candidate_flip_category == n_most_common_category:
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
category_description_candidate_flip = \
|
|
621
|
+
classification_descriptions[c[0]]
|
|
622
|
+
tokens = category_description_candidate_flip.split(';')
|
|
623
|
+
assert len(tokens) == 7
|
|
624
|
+
candidate_flip_category_family = tokens[3]
|
|
625
|
+
candidate_flip_category_genus = tokens[4]
|
|
626
|
+
candidate_flip_category_taxonomic_level = \
|
|
627
|
+
taxonomy_level_index(category_description_candidate_flip)
|
|
389
628
|
|
|
390
|
-
# Only
|
|
391
|
-
if
|
|
629
|
+
# Only proceed if we have valid family strings
|
|
630
|
+
if (len(candidate_flip_category_family) == 0) or \
|
|
631
|
+
(len(most_common_category_family) == 0):
|
|
392
632
|
continue
|
|
393
633
|
|
|
394
|
-
# Only
|
|
395
|
-
if
|
|
634
|
+
# Only proceed if the candidate and the most common category are in the same family
|
|
635
|
+
if candidate_flip_category_family != most_common_category_family:
|
|
396
636
|
continue
|
|
397
637
|
|
|
398
|
-
#
|
|
399
|
-
if
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
det['classifications'] = [det['classifications'][0]]
|
|
407
|
-
|
|
408
|
-
# Confidence values should be sorted within a detection; verify this, and ignore
|
|
409
|
-
top_classification = det['classifications'][0]
|
|
638
|
+
# Don't flip from a species to the genus level in the same genus
|
|
639
|
+
if (candidate_flip_category_genus == most_common_category_genus) and \
|
|
640
|
+
(candidate_flip_category_taxonomic_level > \
|
|
641
|
+
most_common_category_taxonomic_level):
|
|
642
|
+
continue
|
|
643
|
+
|
|
644
|
+
old_category_name = classification_descriptions_clean[c[0]]
|
|
645
|
+
new_category_name = classification_descriptions_clean[most_common_category]
|
|
410
646
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
647
|
+
c[0] = most_common_category
|
|
648
|
+
n_within_family_smoothing_changes += 1
|
|
649
|
+
|
|
650
|
+
# ...for each detection
|
|
414
651
|
|
|
415
|
-
# ...
|
|
416
|
-
|
|
417
|
-
|
|
652
|
+
# ...if the dominant category is legit and we have taxonomic information available
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
|
|
656
|
+
'n_detections_flipped_this_image':n_detections_flipped_this_image,
|
|
657
|
+
'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
|
|
658
|
+
'n_within_family_smoothing_changes':n_within_family_smoothing_changes}
|
|
418
659
|
|
|
419
|
-
# ...
|
|
660
|
+
# ...def _smooth_classifications_for_list_of_detections(...)
|
|
420
661
|
|
|
421
662
|
|
|
422
|
-
def
|
|
663
|
+
def _smooth_single_image(im,
|
|
664
|
+
options,
|
|
665
|
+
other_category_ids,
|
|
666
|
+
classification_descriptions,
|
|
667
|
+
classification_descriptions_clean):
|
|
423
668
|
"""
|
|
424
|
-
|
|
425
|
-
|
|
669
|
+
Smooth classifications for a single image. Returns None if no changes are made,
|
|
670
|
+
else a dict.
|
|
671
|
+
|
|
672
|
+
classification_descriptions_clean should be semicolon-delimited taxonomic strings
|
|
673
|
+
from which common names and GUIDs have already been removed.
|
|
426
674
|
|
|
427
|
-
|
|
675
|
+
Assumes there is only one classification per detection, i.e. that non-top classifications
|
|
676
|
+
have already been remoevd.
|
|
428
677
|
"""
|
|
429
678
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
for c in classifications_this_sequence:
|
|
433
|
-
if c[1] >= options.classification_confidence_threshold:
|
|
434
|
-
category_to_count[c[0]] += 1
|
|
679
|
+
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
680
|
+
return
|
|
435
681
|
|
|
436
|
-
|
|
437
|
-
category_to_count = {k: v for k, v in sorted(category_to_count.items(),
|
|
438
|
-
key=lambda item: item[1],
|
|
439
|
-
reverse=True)}
|
|
682
|
+
detections = im['detections']
|
|
440
683
|
|
|
441
|
-
|
|
684
|
+
# Simplify debugging
|
|
685
|
+
for det in detections:
|
|
686
|
+
det['image_filename'] = im['file']
|
|
442
687
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
def _sort_images_by_time(images):
|
|
688
|
+
to_return = _smooth_classifications_for_list_of_detections(detections,
|
|
689
|
+
options=options,
|
|
690
|
+
other_category_ids=other_category_ids,
|
|
691
|
+
classification_descriptions=classification_descriptions,
|
|
692
|
+
classification_descriptions_clean=classification_descriptions_clean)
|
|
693
|
+
|
|
694
|
+
# Clean out debug information
|
|
695
|
+
for det in detections:
|
|
696
|
+
del det['image_filename']
|
|
697
|
+
|
|
698
|
+
return to_return
|
|
699
|
+
|
|
700
|
+
# ...def smooth_single_image
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
#%% Image-level smoothing
|
|
704
|
+
|
|
705
|
+
def smooth_classification_results_image_level(input_file,output_file=None,options=None):
|
|
464
706
|
"""
|
|
465
|
-
|
|
707
|
+
Smooth classifications at the image level for all results in the MD-formatted results
|
|
708
|
+
file [input_file], optionally writing a new set of results to [output_file].
|
|
709
|
+
|
|
710
|
+
This function generally expresses the notion that an image with 700 cows and one deer
|
|
711
|
+
is really just 701 cows.
|
|
712
|
+
|
|
713
|
+
Only count detections with a classification confidence threshold above
|
|
714
|
+
[options.classification_confidence_threshold], which in practice means we're only
|
|
715
|
+
looking at one category per detection.
|
|
716
|
+
|
|
717
|
+
If an image has at least [options.min_detections_to_overwrite_secondary] such detections
|
|
718
|
+
in the most common category, and no more than [options.max_detections_nondominant_class]
|
|
719
|
+
in the second-most-common category, flip all detections to the most common
|
|
720
|
+
category.
|
|
721
|
+
|
|
722
|
+
Optionally treat some classes as particularly unreliable, typically used to overwrite an
|
|
723
|
+
"other" class.
|
|
724
|
+
|
|
725
|
+
This function also removes everything but the non-dominant classification for each detection.
|
|
726
|
+
|
|
727
|
+
Args:
|
|
728
|
+
input_file (str): MegaDetector-formatted classification results file to smooth. Can
|
|
729
|
+
also be an already-loaded results dict.
|
|
730
|
+
output_file (str, optional): .json file to write smoothed results
|
|
731
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
732
|
+
ClassificationSmoothingOptions for details.
|
|
733
|
+
|
|
734
|
+
Returns:
|
|
735
|
+
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
736
|
+
[output_file] if [output_file] is not None.
|
|
466
737
|
"""
|
|
467
|
-
return sorted(images, key = lambda im: im['datetime'])
|
|
468
738
|
|
|
739
|
+
## Input validation
|
|
740
|
+
|
|
741
|
+
if options is None:
|
|
742
|
+
options = ClassificationSmoothingOptions()
|
|
743
|
+
|
|
744
|
+
r = _prepare_results_for_smoothing(input_file, options)
|
|
745
|
+
d = r['d']
|
|
746
|
+
other_category_ids = r['other_category_ids']
|
|
747
|
+
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
748
|
+
classification_descriptions = r['classification_descriptions']
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
## Smoothing
|
|
752
|
+
|
|
753
|
+
n_other_classifications_changed = 0
|
|
754
|
+
n_other_images_changed = 0
|
|
755
|
+
n_taxonomic_images_changed = 0
|
|
756
|
+
|
|
757
|
+
n_detections_flipped = 0
|
|
758
|
+
n_images_changed = 0
|
|
759
|
+
n_taxonomic_classification_changes = 0
|
|
760
|
+
|
|
761
|
+
# im = d['images'][0]
|
|
762
|
+
for im in tqdm(d['images']):
|
|
763
|
+
|
|
764
|
+
r = _smooth_single_image(im,
|
|
765
|
+
options,
|
|
766
|
+
other_category_ids,
|
|
767
|
+
classification_descriptions=classification_descriptions,
|
|
768
|
+
classification_descriptions_clean=classification_descriptions_clean)
|
|
769
|
+
|
|
770
|
+
if r is None:
|
|
771
|
+
continue
|
|
772
|
+
|
|
773
|
+
n_detections_flipped_this_image = r['n_detections_flipped_this_image']
|
|
774
|
+
n_other_classifications_changed_this_image = \
|
|
775
|
+
r['n_other_classifications_changed_this_image']
|
|
776
|
+
n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
|
|
777
|
+
|
|
778
|
+
n_detections_flipped += n_detections_flipped_this_image
|
|
779
|
+
n_other_classifications_changed += n_other_classifications_changed_this_image
|
|
780
|
+
n_taxonomic_classification_changes += n_taxonomic_changes_this_image
|
|
781
|
+
|
|
782
|
+
if n_detections_flipped_this_image > 0:
|
|
783
|
+
n_images_changed += 1
|
|
784
|
+
if n_other_classifications_changed_this_image > 0:
|
|
785
|
+
n_other_images_changed += 1
|
|
786
|
+
if n_taxonomic_changes_this_image > 0:
|
|
787
|
+
n_taxonomic_images_changed += 1
|
|
788
|
+
|
|
789
|
+
# ...for each image
|
|
790
|
+
|
|
791
|
+
print('Classification smoothing: changed {} detections on {} images'.format(
|
|
792
|
+
n_detections_flipped,n_images_changed))
|
|
793
|
+
|
|
794
|
+
print('"Other" smoothing: changed {} detections on {} images'.format(
|
|
795
|
+
n_other_classifications_changed,n_other_images_changed))
|
|
796
|
+
|
|
797
|
+
print('Taxonomic smoothing: changed {} detections on {} images'.format(
|
|
798
|
+
n_taxonomic_classification_changes,n_taxonomic_images_changed))
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
## Write output
|
|
802
|
+
|
|
803
|
+
if output_file is not None:
|
|
804
|
+
print('Writing results after image-level smoothing to:\n{}'.format(output_file))
|
|
805
|
+
with open(output_file,'w') as f:
|
|
806
|
+
json.dump(d,f,indent=1)
|
|
469
807
|
|
|
470
|
-
|
|
471
|
-
if len(di) == 0:
|
|
472
|
-
return None
|
|
473
|
-
return next(iter(di.items()))[0]
|
|
474
|
-
|
|
808
|
+
return d
|
|
475
809
|
|
|
476
|
-
def
|
|
477
|
-
if len(di) == 0:
|
|
478
|
-
return None
|
|
479
|
-
return next(iter(di.items()))[1]
|
|
810
|
+
# ...def smooth_classification_results_image_level(...)
|
|
480
811
|
|
|
481
812
|
|
|
482
|
-
|
|
813
|
+
#%% Sequence-level smoothing
|
|
814
|
+
|
|
815
|
+
def smooth_classification_results_sequence_level(input_file,
|
|
483
816
|
cct_sequence_information,
|
|
484
817
|
output_file=None,
|
|
485
818
|
options=None):
|
|
@@ -491,44 +824,33 @@ def smooth_classification_results_sequence_level(md_results,
|
|
|
491
824
|
deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
|
|
492
825
|
|
|
493
826
|
Args:
|
|
494
|
-
|
|
827
|
+
input_file (str or dict): MegaDetector-formatted classification results file to smooth
|
|
495
828
|
(or already-loaded results). If you supply a dict, it's modified in place by default, but
|
|
496
829
|
a copy can be forced by setting options.modify_in_place=False.
|
|
497
830
|
cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
|
|
498
831
|
each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
|
|
499
832
|
output_file (str, optional): .json file to write smoothed results
|
|
500
|
-
options (
|
|
501
|
-
|
|
833
|
+
options (ClassificationSmoothingOptions, optional): see
|
|
834
|
+
ClassificationSmoothingOptions for details.
|
|
502
835
|
|
|
503
836
|
Returns:
|
|
504
837
|
dict: MegaDetector-results-formatted dict, identical to what's written to
|
|
505
838
|
[output_file] if [output_file] is not None.
|
|
506
839
|
"""
|
|
507
840
|
|
|
508
|
-
|
|
509
|
-
options = ClassificationSmoothingOptionsSequenceLevel()
|
|
841
|
+
## Input validation
|
|
510
842
|
|
|
511
|
-
if options
|
|
512
|
-
options
|
|
843
|
+
if options is None:
|
|
844
|
+
options = ClassificationSmoothingOptions()
|
|
513
845
|
|
|
514
|
-
|
|
515
|
-
|
|
846
|
+
r = _prepare_results_for_smoothing(input_file, options)
|
|
847
|
+
d = r['d']
|
|
848
|
+
other_category_ids = r['other_category_ids']
|
|
849
|
+
classification_descriptions_clean = r['classification_descriptions_clean']
|
|
850
|
+
classification_descriptions = r['classification_descriptions']
|
|
516
851
|
|
|
517
|
-
assert None in options.min_dominant_class_ratio_for_secondary_override_table, \
|
|
518
|
-
'Oops, it looks like you removed the default (None) key from ' + \
|
|
519
|
-
'options.min_dominant_class_ratio_for_secondary_override_table'
|
|
520
852
|
|
|
521
|
-
|
|
522
|
-
print('Loading MD results from {}'.format(md_results))
|
|
523
|
-
with open(md_results,'r') as f:
|
|
524
|
-
md_results = json.load(f)
|
|
525
|
-
else:
|
|
526
|
-
assert isinstance(md_results,dict)
|
|
527
|
-
if not options.modify_in_place:
|
|
528
|
-
print('Copying MD results instead of modifying in place')
|
|
529
|
-
md_results = copy.deepcopy(md_results)
|
|
530
|
-
else:
|
|
531
|
-
print('Smoothing MD results in place')
|
|
853
|
+
## Make a list of images appearing in each sequence
|
|
532
854
|
|
|
533
855
|
if isinstance(cct_sequence_information,list):
|
|
534
856
|
image_info = cct_sequence_information
|
|
@@ -540,177 +862,119 @@ def smooth_classification_results_sequence_level(md_results,
|
|
|
540
862
|
else:
|
|
541
863
|
assert isinstance(cct_sequence_information,dict)
|
|
542
864
|
image_info = cct_sequence_information['images']
|
|
543
|
-
|
|
544
865
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
sequence_to_images = defaultdict(list)
|
|
866
|
+
sequence_to_image_filenames = defaultdict(list)
|
|
548
867
|
|
|
549
868
|
# im = image_info[0]
|
|
550
869
|
for im in tqdm(image_info):
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
filename_to_results[im['file'].replace('\\','/')] = im
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
##%% Smooth classification results over sequences (prep)
|
|
566
|
-
|
|
567
|
-
classification_category_id_to_name = md_results['classification_categories']
|
|
568
|
-
classification_category_name_to_id = {v: k for k, v in classification_category_id_to_name.items()}
|
|
569
|
-
|
|
570
|
-
class_names = list(classification_category_id_to_name.values())
|
|
571
|
-
|
|
572
|
-
assert(md_results['detection_categories'][options.animal_detection_category] == 'animal')
|
|
573
|
-
|
|
574
|
-
other_category_ids = set([classification_category_name_to_id[s] for s in options.other_category_names])
|
|
575
|
-
|
|
576
|
-
category_ids_to_smooth_to = set([classification_category_name_to_id[s] for s in options.category_names_to_smooth_to])
|
|
577
|
-
assert all([s in class_names for s in options.category_names_to_smooth_to])
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
##%% Smooth classifications at the sequence level (main loop)
|
|
581
|
-
|
|
582
|
-
n_other_flips = 0
|
|
583
|
-
n_classification_flips = 0
|
|
584
|
-
n_unclassified_flips = 0
|
|
870
|
+
sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
|
|
871
|
+
del image_info
|
|
872
|
+
|
|
873
|
+
image_fn_to_classification_results = {}
|
|
874
|
+
for im in d['images']:
|
|
875
|
+
fn = im['file']
|
|
876
|
+
assert fn not in image_fn_to_classification_results
|
|
877
|
+
image_fn_to_classification_results[fn] = im
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
## Smoothing
|
|
585
881
|
|
|
586
|
-
|
|
587
|
-
|
|
882
|
+
n_other_classifications_changed = 0
|
|
883
|
+
n_other_sequences_changed = 0
|
|
884
|
+
n_taxonomic_sequences_changed = 0
|
|
885
|
+
n_within_family_sequences_changed = 0
|
|
588
886
|
|
|
589
|
-
|
|
590
|
-
|
|
887
|
+
n_detections_flipped = 0
|
|
888
|
+
n_sequences_changed = 0
|
|
889
|
+
n_taxonomic_classification_changes = 0
|
|
890
|
+
n_within_family_changes = 0
|
|
591
891
|
|
|
592
|
-
|
|
892
|
+
# sequence_id = list(sequence_to_image_filenames.keys())[0]
|
|
893
|
+
for sequence_id in sequence_to_image_filenames.keys():
|
|
894
|
+
|
|
895
|
+
image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
|
|
593
896
|
|
|
594
|
-
#
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
897
|
+
# if 'file' in image_filenames_this_sequence:
|
|
898
|
+
# import pdb; pdb.set_trace()
|
|
899
|
+
|
|
900
|
+
detections_this_sequence = []
|
|
901
|
+
for image_filename in image_filenames_this_sequence:
|
|
902
|
+
im = image_fn_to_classification_results[image_filename]
|
|
903
|
+
if 'detections' not in im or im['detections'] is None:
|
|
904
|
+
continue
|
|
905
|
+
detections_this_sequence.extend(im['detections'])
|
|
906
|
+
|
|
907
|
+
# Temporarily add image filenames to every detection,
|
|
908
|
+
# for debugging
|
|
909
|
+
for det in im['detections']:
|
|
910
|
+
det['image_filename'] = im['file']
|
|
598
911
|
|
|
599
|
-
|
|
600
|
-
for im in images_this_sequence:
|
|
601
|
-
if debug_fn is not None and debug_fn in im['file_name']:
|
|
602
|
-
raise ValueError('')
|
|
603
|
-
|
|
604
|
-
if len(classifications_this_sequence) == 0:
|
|
912
|
+
if len(detections_this_sequence) == 0:
|
|
605
913
|
continue
|
|
606
914
|
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
915
|
+
r = _smooth_classifications_for_list_of_detections(
|
|
916
|
+
detections=detections_this_sequence,
|
|
917
|
+
options=options,
|
|
918
|
+
other_category_ids=other_category_ids,
|
|
919
|
+
classification_descriptions=classification_descriptions,
|
|
920
|
+
classification_descriptions_clean=classification_descriptions_clean)
|
|
921
|
+
|
|
922
|
+
if r is None:
|
|
612
923
|
continue
|
|
613
924
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
925
|
+
n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
|
|
926
|
+
n_other_classifications_changed_this_sequence = \
|
|
927
|
+
r['n_other_classifications_changed_this_image']
|
|
928
|
+
n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
|
|
929
|
+
n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
|
|
930
|
+
|
|
931
|
+
n_detections_flipped += n_detections_flipped_this_sequence
|
|
932
|
+
n_other_classifications_changed += n_other_classifications_changed_this_sequence
|
|
933
|
+
n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
|
|
934
|
+
n_within_family_changes += n_within_family_changes_this_sequence
|
|
935
|
+
|
|
936
|
+
if n_detections_flipped_this_sequence > 0:
|
|
937
|
+
n_sequences_changed += 1
|
|
938
|
+
if n_other_classifications_changed_this_sequence > 0:
|
|
939
|
+
n_other_sequences_changed += 1
|
|
940
|
+
if n_taxonomic_changes_this_sequence > 0:
|
|
941
|
+
n_taxonomic_sequences_changed += 1
|
|
942
|
+
if n_within_family_changes_this_sequence > 0:
|
|
943
|
+
n_within_family_sequences_changed += 1
|
|
621
944
|
|
|
622
|
-
|
|
623
|
-
## Smooth "other" classifications ##
|
|
624
|
-
|
|
625
|
-
if max_count >= options.min_dominant_class_classifications_above_threshold_for_other_smoothing:
|
|
626
|
-
for c in classifications_this_sequence:
|
|
627
|
-
if c[0] in other_category_ids:
|
|
628
|
-
n_other_flips += 1
|
|
629
|
-
c[0] = dominant_category_id
|
|
630
|
-
c[1] = options.flipped_other_confidence_value
|
|
945
|
+
# ...for each sequence
|
|
631
946
|
|
|
947
|
+
print('Classification smoothing: changed {} detections in {} sequences'.format(
|
|
948
|
+
n_detections_flipped,n_sequences_changed))
|
|
632
949
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
# classifications we changed to be the dominant class. If we wanted to include those...
|
|
636
|
-
#
|
|
637
|
-
# sorted_category_to_count = count_above_threshold_classifications(classifications_this_sequence)
|
|
638
|
-
# max_count = get_first_value_from_sorted_dictionary(sorted_category_to_count)
|
|
639
|
-
# assert dominant_category_id == get_first_key_from_sorted_dictionary(sorted_category_to_count)
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
## Smooth non-dominant classes ##
|
|
643
|
-
|
|
644
|
-
if max_count >= options.min_dominant_class_classifications_above_threshold_for_class_smoothing:
|
|
645
|
-
|
|
646
|
-
# Don't flip classes to the dominant class if they have a large number of classifications
|
|
647
|
-
category_ids_not_to_flip = set()
|
|
648
|
-
|
|
649
|
-
for category_id in sorted_category_to_count.keys():
|
|
650
|
-
secondary_class_count = sorted_category_to_count[category_id]
|
|
651
|
-
dominant_to_secondary_ratio = max_count / secondary_class_count
|
|
652
|
-
|
|
653
|
-
# Don't smooth over this class if there are a bunch of them, and the ratio
|
|
654
|
-
# if primary to secondary class count isn't too large
|
|
655
|
-
|
|
656
|
-
# Default ratio
|
|
657
|
-
ratio_for_override = options.min_dominant_class_ratio_for_secondary_override_table[None]
|
|
658
|
-
|
|
659
|
-
# Does this dominant class have a custom ratio?
|
|
660
|
-
dominant_category_name = classification_category_id_to_name[dominant_category_id]
|
|
661
|
-
if dominant_category_name in options.min_dominant_class_ratio_for_secondary_override_table:
|
|
662
|
-
ratio_for_override = \
|
|
663
|
-
options.min_dominant_class_ratio_for_secondary_override_table[dominant_category_name]
|
|
664
|
-
|
|
665
|
-
if (dominant_to_secondary_ratio < ratio_for_override) and \
|
|
666
|
-
(secondary_class_count > \
|
|
667
|
-
options.max_secondary_class_classifications_above_threshold_for_class_smoothing):
|
|
668
|
-
category_ids_not_to_flip.add(category_id)
|
|
669
|
-
|
|
670
|
-
for c in classifications_this_sequence:
|
|
671
|
-
if c[0] not in category_ids_not_to_flip and c[0] != dominant_category_id:
|
|
672
|
-
c[0] = dominant_category_id
|
|
673
|
-
c[1] = options.flipped_class_confidence_value
|
|
674
|
-
n_classification_flips += 1
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
## Smooth unclassified detections ##
|
|
678
|
-
|
|
679
|
-
if max_count >= options.min_dominant_class_classifications_above_threshold_for_unclassified_smoothing:
|
|
680
|
-
|
|
681
|
-
results_this_sequence = _results_for_sequence(images_this_sequence,filename_to_results)
|
|
682
|
-
detections_this_sequence = []
|
|
683
|
-
for r in results_this_sequence:
|
|
684
|
-
if r['detections'] is not None:
|
|
685
|
-
detections_this_sequence.extend(r['detections'])
|
|
686
|
-
for det in detections_this_sequence:
|
|
687
|
-
if 'classifications' in det and len(det['classifications']) > 0:
|
|
688
|
-
continue
|
|
689
|
-
if det['category'] != options.animal_detection_category:
|
|
690
|
-
continue
|
|
691
|
-
if det['conf'] < options.min_detection_confidence_for_unclassified_flipping:
|
|
692
|
-
continue
|
|
693
|
-
det['classifications'] = [[dominant_category_id,options.flipped_unclassified_confidence_value]]
|
|
694
|
-
n_unclassified_flips += 1
|
|
695
|
-
|
|
696
|
-
# ...for each sequence
|
|
697
|
-
|
|
698
|
-
print('\Finished sequence smoothing\n')
|
|
699
|
-
print('Flipped {} "other" classifications'.format(n_other_flips))
|
|
700
|
-
print('Flipped {} species classifications'.format(n_classification_flips))
|
|
701
|
-
print('Flipped {} unclassified detections'.format(n_unclassified_flips))
|
|
702
|
-
|
|
950
|
+
print('"Other" smoothing: changed {} detections in {} sequences'.format(
|
|
951
|
+
n_other_classifications_changed,n_other_sequences_changed))
|
|
703
952
|
|
|
704
|
-
|
|
953
|
+
print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
|
|
954
|
+
n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
|
|
955
|
+
|
|
956
|
+
print('Within-family smoothing: changed {} detections in {} sequences'.format(
|
|
957
|
+
n_within_family_changes,n_within_family_sequences_changed))
|
|
705
958
|
|
|
706
|
-
|
|
707
|
-
|
|
959
|
+
|
|
960
|
+
## Clean up debug information
|
|
961
|
+
|
|
962
|
+
for im in d['images']:
|
|
963
|
+
if 'detections' not in im or im['detections'] is None:
|
|
964
|
+
continue
|
|
965
|
+
for det in im['detections']:
|
|
966
|
+
if 'image_filename' in det:
|
|
967
|
+
del det['image_filename']
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
## Write output
|
|
971
|
+
|
|
972
|
+
if output_file is not None:
|
|
708
973
|
print('Writing sequence-smoothed classification results to {}'.format(
|
|
709
|
-
output_file))
|
|
710
|
-
|
|
974
|
+
output_file))
|
|
711
975
|
with open(output_file,'w') as f:
|
|
712
|
-
json.dump(
|
|
976
|
+
json.dump(d,f,indent=1)
|
|
713
977
|
|
|
714
|
-
return
|
|
978
|
+
return d
|
|
715
979
|
|
|
716
980
|
# ...smooth_classification_results_sequence_level(...)
|