megadetector 5.0.12__py3-none-any.whl → 5.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (45) hide show
  1. megadetector/api/batch_processing/api_core/server.py +1 -1
  2. megadetector/api/batch_processing/api_core/server_api_config.py +0 -1
  3. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -3
  4. megadetector/api/batch_processing/api_core/server_utils.py +0 -4
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  6. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -3
  7. megadetector/classification/efficientnet/utils.py +0 -3
  8. megadetector/data_management/camtrap_dp_to_coco.py +0 -2
  9. megadetector/data_management/cct_json_utils.py +15 -6
  10. megadetector/data_management/coco_to_labelme.py +12 -1
  11. megadetector/data_management/databases/integrity_check_json_db.py +43 -27
  12. megadetector/data_management/importers/cacophony-thermal-importer.py +1 -4
  13. megadetector/data_management/ocr_tools.py +0 -4
  14. megadetector/data_management/read_exif.py +178 -44
  15. megadetector/data_management/rename_images.py +187 -0
  16. megadetector/data_management/wi_download_csv_to_coco.py +3 -2
  17. megadetector/data_management/yolo_output_to_md_output.py +7 -2
  18. megadetector/detection/process_video.py +548 -244
  19. megadetector/detection/pytorch_detector.py +33 -14
  20. megadetector/detection/run_detector.py +17 -5
  21. megadetector/detection/run_detector_batch.py +179 -65
  22. megadetector/detection/run_inference_with_yolov5_val.py +527 -357
  23. megadetector/detection/tf_detector.py +14 -3
  24. megadetector/detection/video_utils.py +284 -61
  25. megadetector/postprocessing/categorize_detections_by_size.py +16 -14
  26. megadetector/postprocessing/classification_postprocessing.py +716 -0
  27. megadetector/postprocessing/compare_batch_results.py +101 -93
  28. megadetector/postprocessing/convert_output_format.py +12 -5
  29. megadetector/postprocessing/merge_detections.py +18 -7
  30. megadetector/postprocessing/postprocess_batch_results.py +133 -127
  31. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +236 -232
  32. megadetector/postprocessing/subset_json_detector_output.py +66 -62
  33. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -2
  34. megadetector/utils/ct_utils.py +5 -4
  35. megadetector/utils/md_tests.py +380 -128
  36. megadetector/utils/path_utils.py +39 -6
  37. megadetector/utils/process_utils.py +13 -4
  38. megadetector/visualization/visualization_utils.py +7 -2
  39. megadetector/visualization/visualize_db.py +79 -77
  40. megadetector/visualization/visualize_detector_output.py +0 -1
  41. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/LICENSE +0 -0
  42. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/METADATA +2 -2
  43. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/RECORD +45 -43
  44. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/top_level.txt +0 -0
  45. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/WHEEL +0 -0
@@ -0,0 +1,716 @@
1
+ """
2
+
3
+ classification_postprocessing.py
4
+
5
+ Functions for postprocessing species classification results, particularly:
6
+
7
+ * Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
8
+ is really just a deer)
9
+ * Smoothing results within an image (an image with 700 cows and one deer is really just 701
10
+ cows)
11
+
12
+ """
13
+
14
+ #%% Constants and imports
15
+
16
+ import json
17
+ import copy
18
+
19
+ from collections import defaultdict
20
+ from tqdm import tqdm
21
+
22
+ from megadetector.utils.ct_utils import is_list_sorted
23
+
24
+
25
+ #%% Options classes
26
+
27
+ class ClassificationSmoothingOptionsImageLevel:
28
+ """
29
+ Options used to parameterize smooth_classification_results_image_level()
30
+ """
31
+
32
+ def __init__(self):
33
+
34
+ #: How many detections do we need above the classification threshold to determine a dominant category
35
+ #: for an image?
36
+ self.min_detections_above_threshold = 4
37
+
38
+ #: Even if we have a dominant class, if a non-dominant class has at least this many classifications
39
+ #: in an image, leave them alone.
40
+ self.max_detections_secondary_class = 3
41
+
42
+ #: If the dominant class has at least this many classifications, overwrite "other" classifications
43
+ self.min_detections_to_overwrite_other = 2
44
+
45
+ #: Names to treat as "other" categories; can't be None, but can be empty
46
+ self.other_category_names = ['other']
47
+
48
+ #: What confidence threshold should we use for assessing the dominant category in an image?
49
+ self.classification_confidence_threshold = 0.6
50
+
51
+ #: Which classifications should we even bother over-writing?
52
+ self.classification_overwrite_threshold = 0.3
53
+
54
+ #: Detection confidence threshold for things we count when determining a dominant class
55
+ self.detection_confidence_threshold = 0.2
56
+
57
+ #: Which detections should we even bother over-writing?
58
+ self.detection_overwrite_threshold = 0.05
59
+
60
+
61
+ class ClassificationSmoothingOptionsSequenceLevel:
62
+ """
63
+ Options used to parameterize smooth_classification_results_sequence_level()
64
+ """
65
+
66
+ def __init__(self):
67
+
68
+ #: Only process detections in this category
69
+ self.animal_detection_category = '1'
70
+
71
+ #: Treat category names on this list as "other", which can be flipped to common
72
+ #: categories.
73
+ self.other_category_names = set(['other'])
74
+
75
+ #: These are the only classes to which we're going to switch "other" classifications.
76
+ #:
77
+ #: Example:
78
+ #:
79
+ #: ['deer','elk','cow','canid','cat','bird','bear']
80
+ self.category_names_to_smooth_to = None
81
+
82
+ #: Only switch classifications to the dominant class if we see the dominant class at least
83
+ #: this many times
84
+ self.min_dominant_class_classifications_above_threshold_for_class_smoothing = 5 # 2
85
+
86
+ #: If we see more than this many of a class that are above threshold, don't switch those
87
+ #: classifications to the dominant class.
88
+ self.max_secondary_class_classifications_above_threshold_for_class_smoothing = 5
89
+
90
+ #: If the ratio between a dominant class and a secondary class count is greater than this,
91
+ #: regardless of the secondary class count, switch those classifications (i.e., ignore
92
+ #: max_secondary_class_classifications_above_threshold_for_class_smoothing).
93
+ #:
94
+ #: This may be different for different dominant classes, e.g. if we see lots of cows, they really
95
+ #: tend to be cows. Less so for canids, so we set a higher "override ratio" for canids.
96
+ #:
97
+ #: Should always include a "None" category as the default ratio.
98
+ #:
99
+ #: Example:
100
+ #:
101
+ #: {'cow':2,None:3}
102
+ self.min_dominant_class_ratio_for_secondary_override_table = {None:3}
103
+
104
+ #: If there are at least this many classifications for the dominant class in a sequence,
105
+ #: regardless of what that class is, convert all 'other' classifications (regardless of
106
+ #: confidence) to that class.
107
+ self.min_dominant_class_classifications_above_threshold_for_other_smoothing = 3 # 2
108
+
109
+ #: If there are at least this many classifications for the dominant class in a sequence,
110
+ #: regardless of what that class is, classify all previously-unclassified detections
111
+ #: as that class.
112
+ self.min_dominant_class_classifications_above_threshold_for_unclassified_smoothing = 3 # 2
113
+
114
+ #: Only count classifications above this confidence level when determining the dominant
115
+ #: class, and when deciding whether to switch other classifications.
116
+ self.classification_confidence_threshold = 0.6
117
+
118
+ #: Confidence values to use when we change a detection's classification (the
119
+ #: original confidence value is irrelevant at that point) (for the "other" class)
120
+ self.flipped_other_confidence_value = 0.6
121
+
122
+ #: Confidence values to use when we change a detection's classification (the
123
+ #: original confidence value is irrelevant at that point) (for all non-other classes)
124
+ self.flipped_class_confidence_value = 0.6
125
+
126
+ #: Confidence values to use when we change a detection's classification (the
127
+ #: original confidence value is irrelevant at that point) (for previously unclassified detections)
128
+ self.flipped_unclassified_confidence_value = 0.6
129
+
130
+ #: Only flip the class label unclassified detections if the detection confidence exceeds this threshold
131
+ self.min_detection_confidence_for_unclassified_flipping = 0.15
132
+
133
+ #: Only relevant when MegaDetector results are supplied as a dict rather than a file; determines
134
+ #: whether smoothing happens in place.
135
+ self.modify_in_place = True
136
+
137
+ # ...class ClassificationSmoothingOptionsSequenceLevel()
138
+
139
+
140
+ #%% Image-level smoothing
141
+
142
+ def smooth_classification_results_image_level(input_file,output_file=None,options=None):
143
+ """
144
+ Smooth classifications at the image level for all results in the MD-formatted results
145
+ file [input_file], optionally writing a new set of results to [output_file].
146
+
147
+ This function generally expresses the notion that an image with 700 cows and one deer
148
+ is really just 701 cows.
149
+
150
+ Only count detections with a classification confidence threshold above
151
+ [options.classification_confidence_threshold], which in practice means we're only
152
+ looking at one category per detection.
153
+
154
+ If an image has at least [options.min_detections_above_threshold] such detections
155
+ in the most common category, and no more than [options.max_detections_secondary_class]
156
+ in the second-most-common category, flip all detections to the most common
157
+ category.
158
+
159
+ Optionally treat some classes as particularly unreliable, typically used to overwrite an
160
+ "other" class.
161
+
162
+ This function also removes everything but the non-dominant classification for each detection.
163
+
164
+ Args:
165
+ input_file (str): MegaDetector-formatted classification results file to smooth
166
+ output_file (str, optional): .json file to write smoothed results
167
+ options (ClassificationSmoothingOptionsImageLevel, optional): see
168
+ ClassificationSmoothingOptionsImageLevel for details.
169
+
170
+ Returns:
171
+ dict: MegaDetector-results-formatted dict, identical to what's written to
172
+ [output_file] if [output_file] is not None.
173
+ """
174
+
175
+ if options is None:
176
+ options = ClassificationSmoothingOptionsImageLevel()
177
+
178
+ with open(input_file,'r') as f:
179
+ print('Loading results from:\n{}'.format(input_file))
180
+ d = json.load(f)
181
+
182
+ category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
183
+ other_category_ids = []
184
+ for s in options.other_category_names:
185
+ if s in category_name_to_id:
186
+ other_category_ids.append(category_name_to_id[s])
187
+ else:
188
+ print('Warning: "other" category {} not present in file {}'.format(
189
+ s,input_file))
190
+
191
+ n_other_classifications_changed = 0
192
+ n_other_images_changed = 0
193
+
194
+ n_detections_flipped = 0
195
+ n_images_changed = 0
196
+
197
+ # Before we do anything else, get rid of everything but the top classification
198
+ # for each detection.
199
+ for im in tqdm(d['images']):
200
+ if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
201
+ continue
202
+
203
+ detections = im['detections']
204
+
205
+ for det in detections:
206
+
207
+ if 'classifications' not in det or len(det['classifications']) == 0:
208
+ continue
209
+
210
+ classification_confidence_values = [c[1] for c in det['classifications']]
211
+ assert is_list_sorted(classification_confidence_values,reverse=True)
212
+ det['classifications'] = [det['classifications'][0]]
213
+
214
+ # ...for each detection in this image
215
+
216
+ # ...for each image
217
+
218
+ # im = d['images'][0]
219
+ for im in tqdm(d['images']):
220
+
221
+ if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
222
+ continue
223
+
224
+ detections = im['detections']
225
+
226
+ category_to_count = defaultdict(int)
227
+ for det in detections:
228
+ if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
229
+ for c in det['classifications']:
230
+ if c[1] >= options.classification_confidence_threshold:
231
+ category_to_count[c[0]] += 1
232
+ # ...for each classification
233
+ # ...if there are classifications for this detection
234
+ # ...for each detection
235
+
236
+ if len(category_to_count) <= 1:
237
+ continue
238
+
239
+ category_to_count = {k: v for k, v in sorted(category_to_count.items(),
240
+ key=lambda item: item[1],
241
+ reverse=True)}
242
+
243
+ keys = list(category_to_count.keys())
244
+
245
+ # Handle a quirky special case: if the most common category is "other" and
246
+ # it's "tied" with the second-most-common category, swap them
247
+ if (len(keys) > 1) and \
248
+ (keys[0] in other_category_ids) and \
249
+ (keys[1] not in other_category_ids) and \
250
+ (category_to_count[keys[0]] == category_to_count[keys[1]]):
251
+ keys[1], keys[0] = keys[0], keys[1]
252
+
253
+ max_count = category_to_count[keys[0]]
254
+ # secondary_count = category_to_count[keys[1]]
255
+ # The 'secondary count' is the most common non-other class
256
+ secondary_count = 0
257
+ for i_key in range(1,len(keys)):
258
+ if keys[i_key] not in other_category_ids:
259
+ secondary_count = category_to_count[keys[i_key]]
260
+ break
261
+
262
+ most_common_category = keys[0]
263
+
264
+ assert max_count >= secondary_count
265
+
266
+ # If we have at least *min_detections_to_overwrite_other* in a category that isn't
267
+ # "other", change all "other" classifications to that category
268
+ if max_count >= options.min_detections_to_overwrite_other and \
269
+ most_common_category not in other_category_ids:
270
+
271
+ other_change_made = False
272
+
273
+ for det in detections:
274
+
275
+ if ('classifications' in det) and \
276
+ (det['conf'] >= options.detection_overwrite_threshold):
277
+
278
+ for c in det['classifications']:
279
+
280
+ if c[1] >= options.classification_overwrite_threshold and \
281
+ c[0] in other_category_ids:
282
+
283
+ n_other_classifications_changed += 1
284
+ other_change_made = True
285
+ c[0] = most_common_category
286
+
287
+ # ...for each classification
288
+
289
+ # ...if there are classifications for this detection
290
+
291
+ # ...for each detection
292
+
293
+ if other_change_made:
294
+ n_other_images_changed += 1
295
+
296
+ # ...if we should overwrite all "other" classifications
297
+
298
+ if max_count < options.min_detections_above_threshold:
299
+ continue
300
+
301
+ if secondary_count >= options.max_detections_secondary_class:
302
+ continue
303
+
304
+ # At this point, we know we have a dominant category; change all other above-threshold
305
+ # classifications to that category. That category may have been "other", in which
306
+ # case we may have already made the relevant changes.
307
+
308
+ n_detections_flipped_this_image = 0
309
+
310
+ # det = detections[0]
311
+ for det in detections:
312
+
313
+ if ('classifications' in det) and \
314
+ (det['conf'] >= options.detection_overwrite_threshold):
315
+
316
+ for c in det['classifications']:
317
+ if c[1] >= options.classification_overwrite_threshold and \
318
+ c[0] != most_common_category:
319
+
320
+ c[0] = most_common_category
321
+ n_detections_flipped += 1
322
+ n_detections_flipped_this_image += 1
323
+
324
+ # ...for each classification
325
+
326
+ # ...if there are classifications for this detection
327
+
328
+ # ...for each detection
329
+
330
+ if n_detections_flipped_this_image > 0:
331
+ n_images_changed += 1
332
+
333
+ # ...for each image
334
+
335
+ print('Classification smoothing: changed {} detections on {} images'.format(
336
+ n_detections_flipped,n_images_changed))
337
+
338
+ print('"Other" smoothing: changed {} detections on {} images'.format(
339
+ n_other_classifications_changed,n_other_images_changed))
340
+
341
+ if output_file is not None:
342
+ print('Writing results after image-level smoothing to:\n{}'.format(output_file))
343
+ with open(output_file,'w') as f:
344
+ json.dump(d,f,indent=1)
345
+
346
+ return d
347
+
348
+ # ...def smooth_classification_results_image_level(...)
349
+
350
+
351
+ #%% Sequence-level smoothing
352
+
353
+ def _results_for_sequence(images_this_sequence,filename_to_results):
354
+ """
355
+ Fetch MD results for every image in this sequence, based on the 'file_name' field
356
+ """
357
+
358
+ results_this_sequence = []
359
+ for im in images_this_sequence:
360
+ fn = im['file_name']
361
+ results_this_image = filename_to_results[fn]
362
+ assert isinstance(results_this_image,dict)
363
+ results_this_sequence.append(results_this_image)
364
+
365
+ return results_this_sequence
366
+
367
+
368
+ def _top_classifications_for_sequence(images_this_sequence,filename_to_results,options):
369
+ """
370
+ Return all top-1 animal classifications for every detection in this
371
+ sequence, regardless of confidence
372
+
373
+ May modify [images_this_sequence] (removing non-top-1 classifications)
374
+ """
375
+
376
+ classifications_this_sequence = []
377
+
378
+ # im = images_this_sequence[0]
379
+ for im in images_this_sequence:
380
+
381
+ fn = im['file_name']
382
+ results_this_image = filename_to_results[fn]
383
+
384
+ if results_this_image['detections'] is None:
385
+ continue
386
+
387
+ # det = results_this_image['detections'][0]
388
+ for det in results_this_image['detections']:
389
+
390
+ # Only process animal detections
391
+ if det['category'] != options.animal_detection_category:
392
+ continue
393
+
394
+ # Only process detections with classification information
395
+ if 'classifications' not in det:
396
+ continue
397
+
398
+ # We only care about top-1 classifications, remove everything else
399
+ if len(det['classifications']) > 1:
400
+
401
+ # Make sure the list of classifications is already sorted by confidence
402
+ classification_confidence_values = [c[1] for c in det['classifications']]
403
+ assert is_list_sorted(classification_confidence_values,reverse=True)
404
+
405
+ # ...and just keep the first one
406
+ det['classifications'] = [det['classifications'][0]]
407
+
408
+ # Confidence values should be sorted within a detection; verify this, and ignore
409
+ top_classification = det['classifications'][0]
410
+
411
+ classifications_this_sequence.append(top_classification)
412
+
413
+ # ...for each detection in this image
414
+
415
+ # ...for each image in this sequence
416
+
417
+ return classifications_this_sequence
418
+
419
+ # ..._top_classifications_for_sequence()
420
+
421
+
422
+ def _count_above_threshold_classifications(classifications_this_sequence,options):
423
+ """
424
+ Given a list of classification objects (tuples), return a dict mapping
425
+ category IDs to the count of above-threshold classifications.
426
+
427
+ This dict's keys will be sorted in descending order by frequency.
428
+ """
429
+
430
+ # Count above-threshold classifications in this sequence
431
+ category_to_count = defaultdict(int)
432
+ for c in classifications_this_sequence:
433
+ if c[1] >= options.classification_confidence_threshold:
434
+ category_to_count[c[0]] += 1
435
+
436
+ # Sort the dictionary in descending order by count
437
+ category_to_count = {k: v for k, v in sorted(category_to_count.items(),
438
+ key=lambda item: item[1],
439
+ reverse=True)}
440
+
441
+ keys_sorted_by_frequency = list(category_to_count.keys())
442
+
443
+ # Handle a quirky special case: if the most common category is "other" and
444
+ # it's "tied" with the second-most-common category, swap them.
445
+ if (options.other_category_names is not None) and (len(options.other_category_names) > 0):
446
+ if (len(keys_sorted_by_frequency) > 1) and \
447
+ (keys_sorted_by_frequency[0] in options.other_category_names) and \
448
+ (keys_sorted_by_frequency[1] not in options.other_category_names) and \
449
+ (category_to_count[keys_sorted_by_frequency[0]] == \
450
+ category_to_count[keys_sorted_by_frequency[1]]):
451
+ keys_sorted_by_frequency[1], keys_sorted_by_frequency[0] = \
452
+ keys_sorted_by_frequency[0], keys_sorted_by_frequency[1]
453
+
454
+ sorted_category_to_count = {}
455
+ for k in keys_sorted_by_frequency:
456
+ sorted_category_to_count[k] = category_to_count[k]
457
+
458
+ return sorted_category_to_count
459
+
460
+ # ...def _count_above_threshold_classifications()
461
+
462
+
463
+ def _sort_images_by_time(images):
464
+ """
465
+ Returns a copy of [images], sorted by the 'datetime' field (ascending).
466
+ """
467
+ return sorted(images, key = lambda im: im['datetime'])
468
+
469
+
470
+ def _get_first_key_from_sorted_dictionary(di):
471
+ if len(di) == 0:
472
+ return None
473
+ return next(iter(di.items()))[0]
474
+
475
+
476
+ def _get_first_value_from_sorted_dictionary(di):
477
+ if len(di) == 0:
478
+ return None
479
+ return next(iter(di.items()))[1]
480
+
481
+
482
+ def smooth_classification_results_sequence_level(md_results,
483
+ cct_sequence_information,
484
+ output_file=None,
485
+ options=None):
486
+ """
487
+ Smooth classifications at the sequence level for all results in the MD-formatted results
488
+ file [md_results_file], optionally writing a new set of results to [output_file].
489
+
490
+ This function generally expresses the notion that a sequence that looks like
491
+ deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
492
+
493
+ Args:
494
+ md_results (str or dict): MegaDetector-formatted classification results file to smooth
495
+ (or already-loaded results). If you supply a dict, it's modified in place by default, but
496
+ a copy can be forced by setting options.modify_in_place=False.
497
+ cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
498
+ each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
499
+ output_file (str, optional): .json file to write smoothed results
500
+ options (ClassificationSmoothingOptionsSequenceLevel, optional): see
501
+ ClassificationSmoothingOptionsSequenceLevel for details.
502
+
503
+ Returns:
504
+ dict: MegaDetector-results-formatted dict, identical to what's written to
505
+ [output_file] if [output_file] is not None.
506
+ """
507
+
508
+ if options is None:
509
+ options = ClassificationSmoothingOptionsSequenceLevel()
510
+
511
+ if options.category_names_to_smooth_to is None:
512
+ options.category_names_to_smooth_to = []
513
+
514
+ if options.other_category_names is None:
515
+ options.other_category_names = []
516
+
517
+ assert None in options.min_dominant_class_ratio_for_secondary_override_table, \
518
+ 'Oops, it looks like you removed the default (None) key from ' + \
519
+ 'options.min_dominant_class_ratio_for_secondary_override_table'
520
+
521
+ if isinstance(md_results,str):
522
+ print('Loading MD results from {}'.format(md_results))
523
+ with open(md_results,'r') as f:
524
+ md_results = json.load(f)
525
+ else:
526
+ assert isinstance(md_results,dict)
527
+ if not options.modify_in_place:
528
+ print('Copying MD results instead of modifying in place')
529
+ md_results = copy.deepcopy(md_results)
530
+ else:
531
+ print('Smoothing MD results in place')
532
+
533
+ if isinstance(cct_sequence_information,list):
534
+ image_info = cct_sequence_information
535
+ elif isinstance(cct_sequence_information,str):
536
+ print('Loading sequence information from {}'.format(cct_sequence_information))
537
+ with open(cct_sequence_information,'r') as f:
538
+ cct_sequence_information = json.load(f)
539
+ image_info = cct_sequence_information['images']
540
+ else:
541
+ assert isinstance(cct_sequence_information,dict)
542
+ image_info = cct_sequence_information['images']
543
+
544
+
545
+ ##%% Make a list of images appearing at each location
546
+
547
+ sequence_to_images = defaultdict(list)
548
+
549
+ # im = image_info[0]
550
+ for im in tqdm(image_info):
551
+ sequence_to_images[im['seq_id']].append(im)
552
+
553
+ all_sequences = list(sorted(sequence_to_images.keys()))
554
+
555
+
556
+ ##%% Load classification results
557
+
558
+ # Map each filename to classification results for that file
559
+ filename_to_results = {}
560
+
561
+ for im in tqdm(md_results['images']):
562
+ filename_to_results[im['file'].replace('\\','/')] = im
563
+
564
+
565
+ ##%% Smooth classification results over sequences (prep)
566
+
567
+ classification_category_id_to_name = md_results['classification_categories']
568
+ classification_category_name_to_id = {v: k for k, v in classification_category_id_to_name.items()}
569
+
570
+ class_names = list(classification_category_id_to_name.values())
571
+
572
+ assert(md_results['detection_categories'][options.animal_detection_category] == 'animal')
573
+
574
+ other_category_ids = set([classification_category_name_to_id[s] for s in options.other_category_names])
575
+
576
+ category_ids_to_smooth_to = set([classification_category_name_to_id[s] for s in options.category_names_to_smooth_to])
577
+ assert all([s in class_names for s in options.category_names_to_smooth_to])
578
+
579
+
580
+ ##%% Smooth classifications at the sequence level (main loop)
581
+
582
+ n_other_flips = 0
583
+ n_classification_flips = 0
584
+ n_unclassified_flips = 0
585
+
586
+ # Break if this token is contained in a filename (set to None for normal operation)
587
+ debug_fn = None
588
+
589
+ # i_sequence = 0; seq_id = all_sequences[i_sequence]
590
+ for i_sequence,seq_id in tqdm(enumerate(all_sequences),total=len(all_sequences)):
591
+
592
+ images_this_sequence = sequence_to_images[seq_id]
593
+
594
+ # Count top-1 classifications in this sequence (regardless of confidence)
595
+ classifications_this_sequence = _top_classifications_for_sequence(images_this_sequence,
596
+ filename_to_results,
597
+ options)
598
+
599
+ # Handy debugging code for looking at the numbers for a particular sequence
600
+ for im in images_this_sequence:
601
+ if debug_fn is not None and debug_fn in im['file_name']:
602
+ raise ValueError('')
603
+
604
+ if len(classifications_this_sequence) == 0:
605
+ continue
606
+
607
+ # Count above-threshold classifications for each category
608
+ sorted_category_to_count = _count_above_threshold_classifications(
609
+ classifications_this_sequence,options)
610
+
611
+ if len(sorted_category_to_count) == 0:
612
+ continue
613
+
614
+ max_count = _get_first_value_from_sorted_dictionary(sorted_category_to_count)
615
+ dominant_category_id = _get_first_key_from_sorted_dictionary(sorted_category_to_count)
616
+
617
+ # If our dominant category ID isn't something we want to smooth to,
618
+ # don't mess around with this sequence
619
+ if dominant_category_id not in category_ids_to_smooth_to:
620
+ continue
621
+
622
+
623
+ ## Smooth "other" classifications ##
624
+
625
+ if max_count >= options.min_dominant_class_classifications_above_threshold_for_other_smoothing:
626
+ for c in classifications_this_sequence:
627
+ if c[0] in other_category_ids:
628
+ n_other_flips += 1
629
+ c[0] = dominant_category_id
630
+ c[1] = options.flipped_other_confidence_value
631
+
632
+
633
+ # By not re-computing "max_count" here, we are making a decision that the count used
634
+ # to decide whether a class should overwrite another class does not include any "other"
635
+ # classifications we changed to be the dominant class. If we wanted to include those...
636
+ #
637
+ # sorted_category_to_count = count_above_threshold_classifications(classifications_this_sequence)
638
+ # max_count = get_first_value_from_sorted_dictionary(sorted_category_to_count)
639
+ # assert dominant_category_id == get_first_key_from_sorted_dictionary(sorted_category_to_count)
640
+
641
+
642
+ ## Smooth non-dominant classes ##
643
+
644
+ if max_count >= options.min_dominant_class_classifications_above_threshold_for_class_smoothing:
645
+
646
+ # Don't flip classes to the dominant class if they have a large number of classifications
647
+ category_ids_not_to_flip = set()
648
+
649
+ for category_id in sorted_category_to_count.keys():
650
+ secondary_class_count = sorted_category_to_count[category_id]
651
+ dominant_to_secondary_ratio = max_count / secondary_class_count
652
+
653
+ # Don't smooth over this class if there are a bunch of them, and the ratio
654
+ # if primary to secondary class count isn't too large
655
+
656
+ # Default ratio
657
+ ratio_for_override = options.min_dominant_class_ratio_for_secondary_override_table[None]
658
+
659
+ # Does this dominant class have a custom ratio?
660
+ dominant_category_name = classification_category_id_to_name[dominant_category_id]
661
+ if dominant_category_name in options.min_dominant_class_ratio_for_secondary_override_table:
662
+ ratio_for_override = \
663
+ options.min_dominant_class_ratio_for_secondary_override_table[dominant_category_name]
664
+
665
+ if (dominant_to_secondary_ratio < ratio_for_override) and \
666
+ (secondary_class_count > \
667
+ options.max_secondary_class_classifications_above_threshold_for_class_smoothing):
668
+ category_ids_not_to_flip.add(category_id)
669
+
670
+ for c in classifications_this_sequence:
671
+ if c[0] not in category_ids_not_to_flip and c[0] != dominant_category_id:
672
+ c[0] = dominant_category_id
673
+ c[1] = options.flipped_class_confidence_value
674
+ n_classification_flips += 1
675
+
676
+
677
+ ## Smooth unclassified detections ##
678
+
679
+ if max_count >= options.min_dominant_class_classifications_above_threshold_for_unclassified_smoothing:
680
+
681
+ results_this_sequence = _results_for_sequence(images_this_sequence,filename_to_results)
682
+ detections_this_sequence = []
683
+ for r in results_this_sequence:
684
+ if r['detections'] is not None:
685
+ detections_this_sequence.extend(r['detections'])
686
+ for det in detections_this_sequence:
687
+ if 'classifications' in det and len(det['classifications']) > 0:
688
+ continue
689
+ if det['category'] != options.animal_detection_category:
690
+ continue
691
+ if det['conf'] < options.min_detection_confidence_for_unclassified_flipping:
692
+ continue
693
+ det['classifications'] = [[dominant_category_id,options.flipped_unclassified_confidence_value]]
694
+ n_unclassified_flips += 1
695
+
696
+ # ...for each sequence
697
+
698
+ print('\Finished sequence smoothing\n')
699
+ print('Flipped {} "other" classifications'.format(n_other_flips))
700
+ print('Flipped {} species classifications'.format(n_classification_flips))
701
+ print('Flipped {} unclassified detections'.format(n_unclassified_flips))
702
+
703
+
704
+ ##%% Write smoothed classification results
705
+
706
+ if output_file is not None:
707
+
708
+ print('Writing sequence-smoothed classification results to {}'.format(
709
+ output_file))
710
+
711
+ with open(output_file,'w') as f:
712
+ json.dump(md_results,f,indent=1)
713
+
714
+ return md_results
715
+
716
+ # ...smooth_classification_results_sequence_level(...)