megadetector 5.0.21__py3-none-any.whl → 5.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (37) hide show
  1. megadetector/data_management/cct_json_utils.py +143 -7
  2. megadetector/data_management/cct_to_md.py +12 -5
  3. megadetector/data_management/databases/integrity_check_json_db.py +83 -77
  4. megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
  5. megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
  6. megadetector/data_management/lila/create_lila_test_set.py +25 -11
  7. megadetector/data_management/lila/download_lila_subset.py +9 -2
  8. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
  9. megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
  10. megadetector/data_management/read_exif.py +10 -14
  11. megadetector/data_management/rename_images.py +1 -1
  12. megadetector/detection/process_video.py +14 -3
  13. megadetector/detection/pytorch_detector.py +15 -3
  14. megadetector/detection/run_detector.py +4 -3
  15. megadetector/detection/run_detector_batch.py +2 -2
  16. megadetector/detection/run_inference_with_yolov5_val.py +121 -13
  17. megadetector/detection/video_utils.py +21 -10
  18. megadetector/postprocessing/classification_postprocessing.py +1 -1
  19. megadetector/postprocessing/compare_batch_results.py +931 -142
  20. megadetector/postprocessing/detector_calibration.py +243 -45
  21. megadetector/postprocessing/md_to_coco.py +85 -20
  22. megadetector/postprocessing/postprocess_batch_results.py +0 -1
  23. megadetector/postprocessing/validate_batch_results.py +65 -15
  24. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
  25. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  27. megadetector/utils/ct_utils.py +71 -14
  28. megadetector/utils/md_tests.py +9 -1
  29. megadetector/utils/path_utils.py +14 -7
  30. megadetector/utils/process_utils.py +9 -3
  31. megadetector/utils/write_html_image_list.py +5 -1
  32. megadetector/visualization/visualization_utils.py +211 -87
  33. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/METADATA +19 -18
  34. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/RECORD +37 -36
  35. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/WHEEL +1 -1
  36. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/LICENSE +0 -0
  37. {megadetector-5.0.21.dist-info → megadetector-5.0.23.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,24 @@ Compare sets of batch results; typically used to compare:
8
8
  * Results before/after RDE
9
9
  * Results with/without augmentation
10
10
 
11
- Makes pairwise comparisons, but can take lists of results files (will perform
12
- all pairwise comparisons). Results are written to an HTML page that shows the number
13
- and nature of disagreements (in the sense of each image being a detection or non-detection),
11
+ Makes pairwise comparisons between sets of results, but can take lists of results files
12
+ (will perform all pairwise comparisons). Results are written to an HTML page that shows the
13
+ number and nature of disagreements (in the sense of each image being a detection or non-detection),
14
14
  with sample images for each category.
15
15
 
16
+ Operates in one of three modes, depending on whether ground truth labels/boxes are available:
17
+
18
+ * The most common mode assumes no ground truth, just finds agreement/disagreement between
19
+ results files, or class discrepancies.
20
+
21
+ * If image-level ground truth is available, finds image-level agreements on TPs/TNs/FPs/FNs, but also
22
+ finds image-level TPs/TNs/FPs/FNs that are unique to each set of results (at the specified confidence
23
+ threshold).
24
+
25
+ * If box-level ground truth is available, finds box-level agreements on TPs/TNs/FPs/FNs, but also finds
26
+ image-level TPs/TNs/FPs/FNs that are unique to each set of results (at the specified confidence
27
+ threshold).
28
+
16
29
  """
17
30
 
18
31
  #%% Imports
@@ -24,19 +37,36 @@ import copy
24
37
  import urllib
25
38
  import itertools
26
39
 
40
+ import numpy as np
41
+
27
42
  from tqdm import tqdm
28
43
  from functools import partial
44
+ from collections import defaultdict
45
+
46
+ from PIL import ImageFont, ImageDraw
29
47
 
30
48
  from multiprocessing.pool import ThreadPool
31
49
  from multiprocessing.pool import Pool
32
50
 
33
51
  from megadetector.visualization import visualization_utils
34
52
  from megadetector.utils.write_html_image_list import write_html_image_list
53
+ from megadetector.utils.ct_utils import invert_dictionary, get_iou
35
54
  from megadetector.utils import path_utils
55
+ from megadetector.visualization.visualization_utils import get_text_size
36
56
 
37
-
38
- #%% Constants and support classes
57
+ def _maxempty(L):
58
+ """
59
+ Return the maximum value in a list, or 0 if the list is empty
60
+ """
39
61
 
62
+ if len(L) == 0:
63
+ return 0
64
+ else:
65
+ return max(L)
66
+
67
+
68
+ #%% Constants and support classes
69
+
40
70
  class PairwiseBatchComparisonOptions:
41
71
  """
42
72
  Defines the options used for a single pairwise comparison; a list of these
@@ -67,7 +97,7 @@ class PairwiseBatchComparisonOptions:
67
97
  self.rendering_confidence_threshold_a = 0.1
68
98
 
69
99
  #: Rendering threshold to use for all categories for filename B
70
- self.rendering_confidence_threshold_b = 0.1
100
+ self.rendering_confidence_threshold_b = 0.1
71
101
 
72
102
  # ...class PairwiseBatchComparisonOptions
73
103
 
@@ -128,9 +158,45 @@ class BatchComparisonOptions:
128
158
  #: a warning.
129
159
  self.error_on_non_matching_lists = True
130
160
 
161
+ #: Ground truth .json file in COCO Camera Traps format, or an already-loaded COCO dictionary
162
+ self.ground_truth_file = None
163
+
164
+ #: IoU threshold to use when comparing to ground truth with boxes
165
+ self.gt_iou_threshold = 0.5
166
+
167
+ #: Category names that refer to empty images when image-level ground truth is provided
168
+ self.gt_empty_categories = ['empty','blank','misfire']
169
+
170
+ #: Should we show image-level labels as text on each image when boxes are not available?
171
+ self.show_labels_for_image_level_gt = True
172
+
173
+ #: Should we show category names (instead of numbers) on GT boxes?
174
+ self.show_category_names_on_gt_boxes = True
175
+
176
+ #: Should we show category names (instead of numbers) on detected boxes?
177
+ self.show_category_names_on_detected_boxes = True
178
+
131
179
  #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
132
180
  self.pairwise_options = []
133
181
 
182
+ #: Only process images whose file names contain this token
183
+ #:
184
+ #: This can also be a pointer to a function that takes a string (filename)
185
+ #: and returns a bool (if the function returns True, the image will be
186
+ #: included in the comparison).
187
+ self.required_token = None
188
+
189
+ #: Enable additional debug output
190
+ self.verbose = False
191
+
192
+ #: Separate out the "clean TP" and "clean TN" categories, only relevant when GT is
193
+ #: available.
194
+ self.include_clean_categories = True
195
+
196
+ #: When rendering to the output table, optionally write alternative strings
197
+ #: to describe images
198
+ self.fn_to_display_fn = None
199
+
134
200
  # ...class BatchComparisonOptions
135
201
 
136
202
 
@@ -147,7 +213,8 @@ class PairwiseBatchComparisonResults:
147
213
  #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
148
214
  self.pairwise_options = None
149
215
 
150
- #: A dictionary with keys including:
216
+ #: A dictionary with keys representing category names; in the no-ground-truth case, for example,
217
+ #: category names are:
151
218
  #:
152
219
  #: common_detections
153
220
  #: common_non_detections
@@ -155,7 +222,7 @@ class PairwiseBatchComparisonResults:
155
222
  #: detections_b_only
156
223
  #: class_transitions
157
224
  #
158
- #: Each of these maps a filename to a two-element list (the image in set A, the image in set B).
225
+ #: Values are dicts with fields 'im_a', 'im_b', 'sort_conf', and 'im_gt'
159
226
  self.categories_to_image_pairs = None
160
227
 
161
228
  # ...class PairwiseBatchComparisonResults
@@ -212,8 +279,8 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
212
279
 
213
280
  im = visualization_utils.open_image(input_image_path)
214
281
  image_pair = image_pairs[fn]
215
- detections_a = image_pair[0]['detections']
216
- detections_b = image_pair[1]['detections']
282
+ detections_a = image_pair['im_a']['detections']
283
+ detections_b = image_pair['im_b']['detections']
217
284
 
218
285
  custom_strings_a = [''] * len(detections_a)
219
286
  custom_strings_b = [''] * len(detections_b)
@@ -234,19 +301,91 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
234
301
  if options.target_width is not None:
235
302
  im = visualization_utils.resize_image(im, options.target_width)
236
303
 
304
+ label_map = None
305
+ if options.show_category_names_on_detected_boxes:
306
+ label_map=options.detection_category_id_to_name
307
+
237
308
  visualization_utils.render_detection_bounding_boxes(detections_a,im,
238
309
  confidence_threshold=pairwise_options.rendering_confidence_threshold_a,
239
310
  thickness=4,expansion=0,
311
+ label_map=label_map,
240
312
  colormap=options.colormap_a,
241
313
  textalign=visualization_utils.TEXTALIGN_LEFT,
314
+ vtextalign=visualization_utils.VTEXTALIGN_TOP,
242
315
  custom_strings=custom_strings_a)
243
316
  visualization_utils.render_detection_bounding_boxes(detections_b,im,
244
317
  confidence_threshold=pairwise_options.rendering_confidence_threshold_b,
245
318
  thickness=2,expansion=0,
319
+ label_map=label_map,
246
320
  colormap=options.colormap_b,
247
- textalign=visualization_utils.TEXTALIGN_RIGHT,
321
+ textalign=visualization_utils.TEXTALIGN_LEFT,
322
+ vtextalign=visualization_utils.VTEXTALIGN_BOTTOM,
248
323
  custom_strings=custom_strings_b)
249
324
 
325
+ # Do we also need to render ground truth?
326
+ if 'im_gt' in image_pair and image_pair['im_gt'] is not None:
327
+
328
+ im_gt = image_pair['im_gt']
329
+ annotations_gt = image_pair['annotations_gt']
330
+ gt_boxes = []
331
+ for ann in annotations_gt:
332
+ if 'bbox' in ann:
333
+ gt_boxes.append(ann['bbox'])
334
+ gt_categories = [ann['category_id'] for ann in annotations_gt]
335
+
336
+ if len(gt_boxes) > 0:
337
+
338
+ label_map = None
339
+ if options.show_category_names_on_gt_boxes:
340
+ label_map=options.gt_category_id_to_name
341
+
342
+ assert len(gt_boxes) == len(gt_categories)
343
+ gt_colormap = ['yellow']*(max(gt_categories)+1)
344
+ visualization_utils.render_db_bounding_boxes(boxes=gt_boxes,
345
+ classes=gt_categories,
346
+ image=im,
347
+ original_size=(im_gt['width'],im_gt['height']),
348
+ label_map=label_map,
349
+ thickness=1,
350
+ expansion=0,
351
+ textalign=visualization_utils.TEXTALIGN_RIGHT,
352
+ vtextalign=visualization_utils.VTEXTALIGN_TOP,
353
+ text_rotation=-90,
354
+ colormap=gt_colormap)
355
+
356
+ else:
357
+
358
+ if options.show_labels_for_image_level_gt:
359
+
360
+ gt_categories_set = set([ann['category_id'] for ann in annotations_gt])
361
+ gt_category_names = [options.gt_category_id_to_name[category_name] for
362
+ category_name in gt_categories_set]
363
+ category_string = ','.join(gt_category_names)
364
+ category_string = '(' + category_string + ')'
365
+
366
+ try:
367
+ font = ImageFont.truetype('arial.ttf', 25)
368
+ except IOError:
369
+ font = ImageFont.load_default()
370
+
371
+ draw = ImageDraw.Draw(im)
372
+
373
+ text_width, text_height = get_text_size(font,category_string)
374
+
375
+ text_left = 10
376
+ text_bottom = text_height + 10
377
+ margin = np.ceil(0.05 * text_height)
378
+
379
+ draw.text(
380
+ (text_left + margin, text_bottom - text_height - margin),
381
+ category_string,
382
+ fill='white',
383
+ font=font)
384
+
385
+ # ...if we have boxes in the GT
386
+
387
+ # ...if we need to render ground truth
388
+
250
389
  output_image_fn = path_utils.flatten_path(fn)
251
390
  output_image_path = os.path.join(category_folder,output_image_fn)
252
391
  im.save(output_image_path)
@@ -255,6 +394,174 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
255
394
  # ...def _render_image_pair()
256
395
 
257
396
 
397
+ def _result_types_to_comparison_category(result_types_present_a,
398
+ result_types_present_b,
399
+ ground_truth_type,
400
+ options):
401
+ """
402
+ Given the set of result types (tp,tn,fp,fn) present in each of two sets of results
403
+ for an image, determine the category to which we want to assign this image.
404
+ """
405
+
406
+ # The "common_tp" category is for the case where both models have *only* TPs
407
+ if ('tp' in result_types_present_a) and ('tp' in result_types_present_b) and \
408
+ (len(result_types_present_a) == 1) and (len(result_types_present_b) == 1):
409
+ return 'common_tp'
410
+
411
+ # The "common_tn" category is for the case where both models have *only* TNs
412
+ if ('tn' in result_types_present_a) and ('tn' in result_types_present_b) and \
413
+ (len(result_types_present_a) == 1) and (len(result_types_present_b) == 1):
414
+ return 'common_tn'
415
+
416
+ """
417
+ # The "common_fp" category is for the case where both models have *only* FPs
418
+ if ('fp' in result_types_present_a) and ('fp' in result_types_present_b) and \
419
+ (len(result_types_present_a) == 1) and (len(result_types_present_b) == 1):
420
+ return 'common_fp'
421
+ """
422
+
423
+ # The "common_fp" category is for the case where both models have at least one FP,
424
+ # and no FNs.
425
+ if ('fp' in result_types_present_a) and ('fp' in result_types_present_b) and \
426
+ ('fn' not in result_types_present_a) and ('fn' not in result_types_present_b):
427
+ return 'common_fp'
428
+
429
+ """
430
+ # The "common_fn" category is for the case where both models have *only* FNs
431
+ if ('fn' in result_types_present_a) and ('fn' in result_types_present_b) and \
432
+ (len(result_types_present_a) == 1) and (len(result_types_present_b) == 1):
433
+ return 'common_fn'
434
+ """
435
+
436
+ # The "common_fn" category is for the case where both models have at least one FN,
437
+ # and no FPs
438
+ if ('fn' in result_types_present_a) and ('fn' in result_types_present_b) and \
439
+ ('fp' not in result_types_present_a) and ('fp' not in result_types_present_b):
440
+ return 'common_fn'
441
+
442
+ ## The tp-only categories are for the case where one model has *only* TPs
443
+
444
+ if ('tp' in result_types_present_a) and (len(result_types_present_a) == 1):
445
+ # Clean TPs are cases where the other model has only FNs, no FPs
446
+ if options.include_clean_categories:
447
+ if ('fn' in result_types_present_b) and \
448
+ ('fp' not in result_types_present_b) and \
449
+ ('tp' not in result_types_present_b):
450
+ return 'clean_tp_a_only'
451
+ # Otherwise, TPs are cases where one model has only TPs, and the other model
452
+ # has any mistakse
453
+ if ('fn' in result_types_present_b) or ('fp' in result_types_present_b):
454
+ return 'tp_a_only'
455
+
456
+ if ('tp' in result_types_present_b) and (len(result_types_present_b) == 1):
457
+ # Clean TPs are cases where the other model has only FNs, no FPs
458
+ if options.include_clean_categories:
459
+ if ('fn' in result_types_present_a) and \
460
+ ('fp' not in result_types_present_a) and \
461
+ ('tp' not in result_types_present_a):
462
+ return 'clean_tp_b_only'
463
+ # Otherwise, TPs are cases where one model has only TPs, and the other model
464
+ # has any mistakse
465
+ if ('fn' in result_types_present_a) or ('fp' in result_types_present_a):
466
+ return 'tp_b_only'
467
+
468
+ # The tn-only categories are for the case where one model has a TN and the
469
+ # other has at least one fp
470
+ if 'tn' in result_types_present_a and 'fp' in result_types_present_b:
471
+ assert len(result_types_present_a) == 1
472
+ assert len(result_types_present_b) == 1
473
+ return 'tn_a_only'
474
+ if 'tn' in result_types_present_b and 'fp' in result_types_present_a:
475
+ assert len(result_types_present_a) == 1
476
+ assert len(result_types_present_b) == 1
477
+ return 'tn_b_only'
478
+
479
+ # The 'fpfn' category is for everything else
480
+ return 'fpfn'
481
+
482
+ # ...def _result_types_to_comparison_category(...)
483
+
484
+
485
+ def _subset_md_results(results,options):
486
+ """
487
+ Subset a set of MegaDetector results according to the rules defined in the
488
+ BatchComparisonOptions object [options]. Typically used to filter for files
489
+ containing a particular string. Modifies [results] in place, also returns.
490
+
491
+ Args:
492
+ results (dict): MD results
493
+ options (BatchComparisonOptions): job options containing filtering rules
494
+ """
495
+
496
+ if options.required_token is None:
497
+ return results
498
+
499
+ images_to_keep = []
500
+ for im in results['images']:
501
+ # Is [required_token] a string?
502
+ if isinstance(options.required_token,str):
503
+ if options.required_token in im['file']:
504
+ images_to_keep.append(im)
505
+ # Otherwise [required_token] is a function
506
+ else:
507
+ assert callable(options.required_token), 'Illegal value for required_token'
508
+ if options.required_token(im['file']):
509
+ images_to_keep.append(im)
510
+
511
+
512
+ if options.verbose:
513
+ print('Keeping {} of {} images in MD results'.format(
514
+ len(images_to_keep),len(results['images'])))
515
+
516
+ results['images'] = images_to_keep
517
+ return results
518
+
519
+ # ...def _subset_md_results(...)
520
+
521
+
522
+ def _subset_ground_truth(gt_data,options):
523
+ """
524
+ Subset a set of COCO annotations according to the rules defined in the
525
+ BatchComparisonOptions object [options]. Typically used to filter for files
526
+ containing a particular string. Modifies [results] in place, also returns.
527
+
528
+ Args:
529
+ gt_data (dict): COCO-formatted annotations
530
+ options (BatchComparisonOptions): job options containing filtering rules
531
+ """
532
+
533
+ if options.required_token is None:
534
+ return gt_data
535
+
536
+ images_to_keep = []
537
+ for im in gt_data['images']:
538
+ if isinstance(options.required_token,str):
539
+ if options.required_token in im['file_name']:
540
+ images_to_keep.append(im)
541
+ else:
542
+ if options.required_token(im['file_name']):
543
+ images_to_keep.append(im)
544
+
545
+ image_ids_to_keep_set = set([im['id'] for im in images_to_keep])
546
+
547
+ annotations_to_keep = []
548
+ for ann in gt_data['annotations']:
549
+ if ann['image_id'] in image_ids_to_keep_set:
550
+ annotations_to_keep.append(ann)
551
+
552
+ if options.verbose:
553
+ print('Keeping {} of {} images, {} of {} annotations in GT data'.format(
554
+ len(images_to_keep),len(gt_data['images']),
555
+ len(annotations_to_keep),len(gt_data['annotations'])))
556
+
557
+ gt_data['images'] = images_to_keep
558
+ gt_data['annotations'] = annotations_to_keep
559
+
560
+ return gt_data
561
+
562
+ # ...def _subset_ground_truth(...)
563
+
564
+
258
565
  def _pairwise_compare_batch_results(options,output_index,pairwise_options):
259
566
  """
260
567
  The main entry point for this module is compare_batch_results(), which calls
@@ -297,7 +604,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
297
604
  assert os.path.isfile(pairwise_options.results_filename_b), \
298
605
  "Can't find results file {}".format(pairwise_options.results_filename_b)
299
606
  assert os.path.isdir(options.image_folder), \
300
- "Can't find image folder {}".format(pairwise_options.image_folder)
607
+ "Can't find image folder {}".format(options.image_folder)
301
608
  os.makedirs(options.output_folder,exist_ok=True)
302
609
 
303
610
 
@@ -323,6 +630,9 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
323
630
 
324
631
  detection_categories_a = results_a['detection_categories']
325
632
  detection_categories_b = results_b['detection_categories']
633
+ detection_category_id_to_name = detection_categories_a
634
+ detection_category_name_to_id = invert_dictionary(detection_categories_a)
635
+ options.detection_category_id_to_name = detection_category_id_to_name
326
636
 
327
637
  if pairwise_options.results_description_a is None:
328
638
  if 'detector' not in results_a['info']:
@@ -338,9 +648,13 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
338
648
  else:
339
649
  pairwise_options.results_description_b = results_b['info']['detector']
340
650
 
651
+ # Restrict this comparison to specific files if requested
652
+ results_a = _subset_md_results(results_a, options)
653
+ results_b = _subset_md_results(results_b, options)
654
+
341
655
  images_a = results_a['images']
342
656
  images_b = results_b['images']
343
-
657
+
344
658
  filename_to_image_a = {im['file']:im for im in images_a}
345
659
  filename_to_image_b = {im['file']:im for im in images_b}
346
660
 
@@ -369,20 +683,218 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
369
683
  else:
370
684
  filenames_to_compare = options.filenames_to_include
371
685
 
686
+
687
+ ##%% Determine whether ground truth is available
688
+
689
+ # ...and determine what type of GT is available, boxes or image-level labels
690
+
691
+ gt_data = None
692
+ gt_category_id_to_detection_category_id = None
693
+
694
+ if options.ground_truth_file is None:
695
+
696
+ ground_truth_type = 'no_gt'
697
+
698
+ else:
699
+
700
+ # Read ground truth data if necessary
701
+ if isinstance(options.ground_truth_file,dict):
702
+ gt_data = options.ground_truth_file
703
+ else:
704
+ assert isinstance(options.ground_truth_file,str)
705
+ with open(options.ground_truth_file,'r') as f:
706
+ gt_data = json.load(f)
707
+
708
+ # Restrict this comparison to specific files if requested
709
+ gt_data = _subset_ground_truth(gt_data, options)
710
+
711
+ # Do we have box-level ground truth or image-level ground truth?
712
+ found_box = False
713
+
714
+ for ann in gt_data['annotations']:
715
+ if 'bbox' in ann:
716
+ found_box = True
717
+ break
718
+
719
+ if found_box:
720
+ ground_truth_type = 'bbox_gt'
721
+ else:
722
+ ground_truth_type = 'image_level_gt'
723
+
724
+ gt_category_name_to_id = {c['name']:c['id'] for c in gt_data['categories']}
725
+ gt_category_id_to_name = invert_dictionary(gt_category_name_to_id)
726
+ options.gt_category_id_to_name = gt_category_id_to_name
727
+
728
+ if ground_truth_type == 'bbox_gt':
729
+
730
+ if not options.class_agnostic_comparison:
731
+ assert set(gt_category_name_to_id.keys()) == set(detection_category_name_to_id.keys()), \
732
+ 'Cannot compare detections to GT with different categories when class_agnostic_comparison is False'
733
+ gt_category_id_to_detection_category_id = {}
734
+ for category_name in gt_category_name_to_id:
735
+ gt_category_id = gt_category_name_to_id[category_name]
736
+ detection_category_id = detection_category_name_to_id[category_name]
737
+ gt_category_id_to_detection_category_id[gt_category_id] = detection_category_id
738
+
739
+ elif ground_truth_type == 'image_level_gt':
740
+
741
+ if not options.class_agnostic_comparison:
742
+ for detection_category_name in detection_category_name_to_id:
743
+ if detection_category_name not in gt_category_name_to_id:
744
+ raise ValueError('Detection category {} not available in GT category list'.format(
745
+ detection_category_name))
746
+ for gt_category_name in gt_category_name_to_id:
747
+ if gt_category_name in options.gt_empty_categories:
748
+ continue
749
+ if (gt_category_name not in detection_category_name_to_id):
750
+ raise ValueError('GT category {} not available in detection category list'.format(
751
+ gt_category_name))
752
+
753
+ assert ground_truth_type in ('no_gt','bbox_gt','image_level_gt')
754
+
755
+ # Make sure ground truth data refers to at least *some* of the same files that are in our
756
+ # results files
757
+ if gt_data is not None:
758
+
759
+ filenames_to_compare_set = set(filenames_to_compare)
760
+ gt_filenames = [im['file_name'] for im in gt_data['images']]
761
+ gt_filenames_set = set(gt_filenames)
762
+
763
+ common_filenames = filenames_to_compare_set.intersection(gt_filenames_set)
764
+ assert len(common_filenames) > 0, 'MD results files and ground truth file have no images in common'
765
+
766
+ filenames_only_in_gt = gt_filenames_set.difference(filenames_to_compare_set)
767
+ if len(filenames_only_in_gt) > 0:
768
+ print('Warning: {} files are only available in the ground truth (not in MD results)'.format(
769
+ len(filenames_only_in_gt)))
770
+
771
+ filenames_only_in_results = gt_filenames_set.difference(gt_filenames)
772
+ if len(filenames_only_in_results) > 0:
773
+ print('Warning: {} files are only available in the MD results (not in ground truth)'.format(
774
+ len(filenames_only_in_results)))
775
+
776
+ if options.error_on_non_matching_lists:
777
+ if len(filenames_only_in_gt) > 0 or len(filenames_only_in_results) > 0:
778
+ raise ValueError('GT image set is not identical to result image sets')
779
+
780
+ filenames_to_compare = sorted(list(common_filenames))
781
+
782
+ # Map filenames to ground truth images and annotations
783
+ filename_to_image_gt = {im['file_name']:im for im in gt_data['images']}
784
+ gt_image_id_to_image = {}
785
+ for im in gt_data['images']:
786
+ gt_image_id_to_image[im['id']] = im
787
+ gt_image_id_to_annotations = defaultdict(list)
788
+ for ann in gt_data['annotations']:
789
+ gt_image_id_to_annotations[ann['image_id']].append(ann)
790
+
791
+ # Convert annotations to relative (MD) coordinates
792
+
793
+ # ann = gt_data['annotations'][0]
794
+ for ann in gt_data['annotations']:
795
+ gt_image = gt_image_id_to_image[ann['image_id']]
796
+ if 'bbox' not in ann:
797
+ continue
798
+ # COCO format: [x,y,width,height]
799
+ # normalized format: [x_min, y_min, width_of_box, height_of_box]
800
+ normalized_bbox = [ann['bbox'][0]/gt_image['width'],ann['bbox'][1]/gt_image['height'],
801
+ ann['bbox'][2]/gt_image['width'],ann['bbox'][3]/gt_image['height']]
802
+ ann['normalized_bbox'] = normalized_bbox
803
+
804
+
372
805
  ##%% Find differences
373
806
 
374
- # Each of these maps a filename to a two-element list (the image in set A, the image in set B)
375
- #
376
- # Right now, we only handle a very simple notion of class transition, where the detection
377
- # of maximum confidence changes class *and* both images have an above-threshold detection.
378
- common_detections = {}
379
- common_non_detections = {}
380
- detections_a_only = {}
381
- detections_b_only = {}
382
- class_transitions = {}
807
+ # See PairwiseBatchComparisonResults for a description
808
+ categories_to_image_pairs = {}
809
+
810
+ # This will map category names that can be used in filenames (e.g. "common_non_detections" or
811
+ # "false_positives_a_only" to friendly names (e.g. "Common non-detections")
812
+ categories_to_page_titles = None
813
+
814
+ if ground_truth_type == 'no_gt':
815
+
816
+ categories_to_image_pairs['common_detections'] = {}
817
+ categories_to_image_pairs['common_non_detections'] = {}
818
+ categories_to_image_pairs['detections_a_only'] = {}
819
+ categories_to_image_pairs['detections_b_only'] = {}
820
+ categories_to_image_pairs['class_transitions'] = {}
821
+
822
+ categories_to_page_titles = {
823
+ 'common_detections':'Detections common to both models',
824
+ 'common_non_detections':'Non-detections common to both models',
825
+ 'detections_a_only':'Detections reported by model A only',
826
+ 'detections_b_only':'Detections reported by model B only',
827
+ 'class_transitions':'Detections reported as different classes by models A and B'
828
+ }
383
829
 
830
+
831
+ elif (ground_truth_type == 'bbox_gt') or (ground_truth_type == 'image_level_gt'):
832
+
833
+ categories_to_image_pairs['common_tp'] = {}
834
+ categories_to_image_pairs['common_tn'] = {}
835
+ categories_to_image_pairs['common_fp'] = {}
836
+ categories_to_image_pairs['common_fn'] = {}
837
+
838
+ categories_to_image_pairs['tp_a_only'] = {}
839
+ categories_to_image_pairs['tp_b_only'] = {}
840
+ categories_to_image_pairs['tn_a_only'] = {}
841
+ categories_to_image_pairs['tn_b_only'] = {}
842
+
843
+ categories_to_image_pairs['fpfn'] = {}
844
+
845
+ categories_to_page_titles = {
846
+ 'common_tp':'Common true positives',
847
+ 'common_tn':'Common true negatives',
848
+ 'common_fp':'Common false positives',
849
+ 'common_fn':'Common false negatives',
850
+ 'tp_a_only':'TP (A only)',
851
+ 'tp_b_only':'TP (B only)',
852
+ 'tn_a_only':'TN (A only)',
853
+ 'tn_b_only':'TN (B only)',
854
+ 'fpfn':'More complicated discrepancies'
855
+ }
856
+
857
+ if options.include_clean_categories:
858
+
859
+ categories_to_image_pairs['clean_tp_a_only'] = {}
860
+ categories_to_image_pairs['clean_tp_b_only'] = {}
861
+ # categories_to_image_pairs['clean_tn_a_only'] = {}
862
+ # categories_to_image_pairs['clean_tn_b_only'] = {}
863
+
864
+ categories_to_page_titles['clean_tp_a_only'] = 'Clean TP wins for A'
865
+ categories_to_page_titles['clean_tp_b_only'] = 'Clean TP wins for B'
866
+ # categories_to_page_titles['clean_tn_a_only'] = 'Clean TN wins for A'
867
+ # categories_to_page_titles['clean_tn_b_only'] = 'Clean TN wins for B'
868
+
869
+
870
+ else:
871
+
872
+ raise Exception('Unknown ground truth type: {}'.format(ground_truth_type))
873
+
874
+ # Map category IDs to thresholds
875
+ category_id_to_threshold_a = {}
876
+ category_id_to_threshold_b = {}
877
+
878
+ for category_id in detection_categories_a:
879
+ category_name = detection_categories_a[category_id]
880
+ if category_name in pairwise_options.detection_thresholds_a:
881
+ category_id_to_threshold_a[category_id] = \
882
+ pairwise_options.detection_thresholds_a[category_name]
883
+ else:
884
+ category_id_to_threshold_a[category_id] = \
885
+ pairwise_options.detection_thresholds_a['default']
886
+
887
+ for category_id in detection_categories_b:
888
+ category_name = detection_categories_b[category_id]
889
+ if category_name in pairwise_options.detection_thresholds_b:
890
+ category_id_to_threshold_b[category_id] = \
891
+ pairwise_options.detection_thresholds_b[category_name]
892
+ else:
893
+ category_id_to_threshold_b[category_id] = \
894
+ pairwise_options.detection_thresholds_b['default']
895
+
384
896
  # fn = filenames_to_compare[0]
385
- for fn in tqdm(filenames_to_compare):
897
+ for i_file,fn in tqdm(enumerate(filenames_to_compare),total=len(filenames_to_compare)):
386
898
 
387
899
  if fn not in filename_to_image_b:
388
900
 
@@ -395,91 +907,352 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
395
907
  im_a = filename_to_image_a[fn]
396
908
  im_b = filename_to_image_b[fn]
397
909
 
398
- categories_above_threshold_a = set()
399
-
400
- if not 'detections' in im_a or im_a['detections'] is None:
401
- assert 'failure' in im_a and im_a['failure'] is not None
402
- continue
910
+ im_pair = {}
911
+ im_pair['im_a'] = im_a
912
+ im_pair['im_b'] = im_b
913
+ im_pair['im_gt'] = None
914
+ im_pair['annotations_gt'] = None
403
915
 
404
- if not 'detections' in im_b or im_b['detections'] is None:
405
- assert 'failure' in im_b and im_b['failure'] is not None
406
- continue
916
+ if gt_data is not None:
917
+
918
+ if fn not in filename_to_image_gt:
407
919
 
408
- invalid_category_error = False
409
-
410
- # det = im_a['detections'][0]
411
- for det in im_a['detections']:
920
+ # We shouldn't have gotten this far if error_on_non_matching_lists is set
921
+ assert not options.error_on_non_matching_lists
922
+
923
+ print('Skipping filename {}, not in ground truth'.format(fn))
924
+ continue
412
925
 
413
- category_id = det['category']
926
+ im_gt = filename_to_image_gt[fn]
927
+ annotations_gt = gt_image_id_to_annotations[im_gt['id']]
928
+ im_pair['im_gt'] = im_gt
929
+ im_pair['annotations_gt'] = annotations_gt
414
930
 
415
- if category_id not in detection_categories_a:
416
- print('Warning: unexpected category {} for model A on file {}'.format(category_id,fn))
417
- invalid_category_error = True
418
- break
931
+ comparison_category = None
932
+
933
+ # Compare image A to image B, without ground truth
934
+ if ground_truth_type == 'no_gt':
935
+
936
+ categories_above_threshold_a = set()
937
+
938
+ if not 'detections' in im_a or im_a['detections'] is None:
939
+ assert 'failure' in im_a and im_a['failure'] is not None
940
+ continue
941
+
942
+ if not 'detections' in im_b or im_b['detections'] is None:
943
+ assert 'failure' in im_b and im_b['failure'] is not None
944
+ continue
945
+
946
+ invalid_category_error = False
947
+
948
+ # det = im_a['detections'][0]
949
+ for det in im_a['detections']:
419
950
 
420
- conf = det['conf']
951
+ category_id = det['category']
952
+
953
+ if category_id not in category_id_to_threshold_a:
954
+ print('Warning: unexpected category {} for model A on file {}'.format(category_id,fn))
955
+ invalid_category_error = True
956
+ break
957
+
958
+ conf = det['conf']
959
+ conf_thresh = category_id_to_threshold_a[category_id]
960
+ if conf >= conf_thresh:
961
+ categories_above_threshold_a.add(category_id)
962
+
963
+ if invalid_category_error:
964
+ continue
421
965
 
422
- if detection_categories_a[category_id] in pairwise_options.detection_thresholds_a:
423
- conf_thresh = pairwise_options.detection_thresholds_a[detection_categories_a[category_id]]
424
- else:
425
- conf_thresh = pairwise_options.detection_thresholds_a['default']
966
+ categories_above_threshold_b = set()
967
+
968
+ for det in im_b['detections']:
426
969
 
427
- if conf >= conf_thresh:
428
- categories_above_threshold_a.add(category_id)
970
+ category_id = det['category']
971
+
972
+ if category_id not in category_id_to_threshold_b:
973
+ print('Warning: unexpected category {} for model B on file {}'.format(category_id,fn))
974
+ invalid_category_error = True
975
+ break
976
+
977
+ conf = det['conf']
978
+ conf_thresh = category_id_to_threshold_b[category_id]
979
+ if conf >= conf_thresh:
980
+ categories_above_threshold_b.add(category_id)
981
+
982
+ if invalid_category_error:
983
+
984
+ continue
985
+
986
+ detection_a = (len(categories_above_threshold_a) > 0)
987
+ detection_b = (len(categories_above_threshold_b) > 0)
429
988
 
430
- if invalid_category_error:
431
- continue
989
+ if detection_a and detection_b:
990
+ if (categories_above_threshold_a == categories_above_threshold_b) or \
991
+ options.class_agnostic_comparison:
992
+ comparison_category = 'common_detections'
993
+ else:
994
+ comparison_category = 'class_transitions'
995
+ elif (not detection_a) and (not detection_b):
996
+ comparison_category = 'common_non_detections'
997
+ elif detection_a and (not detection_b):
998
+ comparison_category = 'detections_a_only'
999
+ else:
1000
+ assert detection_b and (not detection_a)
1001
+ comparison_category = 'detections_b_only'
1002
+
1003
+ max_conf_a = _maxempty([det['conf'] for det in im_a['detections']])
1004
+ max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
1005
+
1006
+ # Only used if sort_by_confidence is True
1007
+ if comparison_category == 'common_detections':
1008
+ sort_conf = max(max_conf_a,max_conf_b)
1009
+ elif comparison_category == 'common_non_detections':
1010
+ sort_conf = max(max_conf_a,max_conf_b)
1011
+ elif comparison_category == 'detections_a_only':
1012
+ sort_conf = max_conf_a
1013
+ elif comparison_category == 'detections_b_only':
1014
+ sort_conf = max_conf_b
1015
+ elif comparison_category == 'class_transitions':
1016
+ sort_conf = max(max_conf_a,max_conf_b)
1017
+ else:
1018
+ print('Warning: unknown comparison category {}'.format(comparison_category))
1019
+ sort_conf = max(max_conf_a,max_conf_b)
432
1020
 
433
- categories_above_threshold_b = set()
1021
+ elif ground_truth_type == 'bbox_gt':
434
1022
 
435
- for det in im_b['detections']:
1023
+ def _boxes_match(det,gt_ann):
1024
+
1025
+ # if we're doing class-sensitive comparisons, only match same-category classes
1026
+ if not options.class_agnostic_comparison:
1027
+ detection_category_id = det['category']
1028
+ gt_category_id = gt_ann['category_id']
1029
+ if detection_category_id != \
1030
+ gt_category_id_to_detection_category_id[gt_category_id]:
1031
+ return False
1032
+
1033
+ if 'bbox' not in gt_ann:
1034
+ return False
1035
+
1036
+ assert 'normalized_bbox' in gt_ann
1037
+ iou = get_iou(det['bbox'],gt_ann['normalized_bbox'])
1038
+
1039
+ return iou >= options.gt_iou_threshold
436
1040
 
437
- category_id = det['category']
1041
+ # ...def _boxes_match(...)
438
1042
 
439
- if category_id not in detection_categories_b:
440
- print('Warning: unexpected category {} for model B on file {}'.format(category_id,fn))
441
- invalid_category_error = True
442
- break
1043
+ # Categorize each model into TP/TN/FP/FN
1044
+ def _categorize_image_with_box_gt(im_detection,im_gt,annotations_gt,category_id_to_threshold):
443
1045
 
444
- conf = det['conf']
1046
+ annotations_gt = [ann for ann in annotations_gt if 'bbox' in ann]
1047
+
1048
+ assert im_detection['file'] == im_gt['file_name']
1049
+
1050
+ # List of result types - tn, tp, fp, fn - present in this image. tn is
1051
+ # mutually exclusive with the others.
1052
+ result_types_present = set()
1053
+
1054
+ # Find detections above threshold
1055
+ detections_above_threshold = []
1056
+
1057
+ # det = im_detection['detections'][0]
1058
+ for det in im_detection['detections']:
1059
+ category_id = det['category']
1060
+ threshold = category_id_to_threshold[category_id]
1061
+ if det['conf'] > threshold:
1062
+ detections_above_threshold.append(det)
1063
+
1064
+ if len(detections_above_threshold) == 0 and len(annotations_gt) == 0:
1065
+ result_types_present.add('tn')
1066
+ return result_types_present
1067
+
1068
+ # Look for a match for each detection
1069
+ #
1070
+ # det = detections_above_threshold[0]
1071
+ for det in detections_above_threshold:
1072
+
1073
+ det_matches_annotation = False
1074
+
1075
+ # gt_ann = annotations_gt[0]
1076
+ for gt_ann in annotations_gt:
1077
+ if _boxes_match(det, gt_ann):
1078
+ det_matches_annotation = True
1079
+ break
1080
+
1081
+ if det_matches_annotation:
1082
+ result_types_present.add('tp')
1083
+ else:
1084
+ result_types_present.add('fp')
1085
+
1086
+ # Look for a match for each GT bbox
1087
+ #
1088
+ # gt_ann = annotations_gt[0]
1089
+ for gt_ann in annotations_gt:
1090
+
1091
+ annotation_matches_det = False
1092
+
1093
+ for det in detections_above_threshold:
1094
+
1095
+ if _boxes_match(det, gt_ann):
1096
+ annotation_matches_det = True
1097
+ break
1098
+
1099
+ if annotation_matches_det:
1100
+ # We should have found this when we looped over detections
1101
+ assert 'tp' in result_types_present
1102
+ else:
1103
+ result_types_present.add('fn')
1104
+
1105
+ # ...for each above-threshold detection
1106
+
1107
+ return result_types_present
445
1108
 
446
- if detection_categories_b[category_id] in pairwise_options.detection_thresholds_b:
447
- conf_thresh = pairwise_options.detection_thresholds_b[detection_categories_b[category_id]]
448
- else:
449
- conf_thresh = pairwise_options.detection_thresholds_a['default']
1109
+ # ...def _categorize_image_with_box_gt(...)
1110
+
1111
+ # im_detection = im_a; category_id_to_threshold = category_id_to_threshold_a
1112
+ result_types_present_a = \
1113
+ _categorize_image_with_box_gt(im_a,im_gt,annotations_gt,category_id_to_threshold_a)
1114
+ result_types_present_b = \
1115
+ _categorize_image_with_box_gt(im_b,im_gt,annotations_gt,category_id_to_threshold_b)
1116
+
1117
+
1118
+ ## Some combinations are nonsense
450
1119
 
451
- if conf >= conf_thresh:
452
- categories_above_threshold_b.add(category_id)
453
-
454
- if invalid_category_error:
455
- continue
456
-
457
- im_pair = (im_a,im_b)
458
-
459
- detection_a = (len(categories_above_threshold_a) > 0)
460
- detection_b = (len(categories_above_threshold_b) > 0)
1120
+ # TNs are mutually exclusive with other categories
1121
+ if 'tn' in result_types_present_a or 'tn' in result_types_present_b:
1122
+ assert len(result_types_present_a) == 1
1123
+ assert len(result_types_present_b) == 1
1124
+
1125
+ # If either model has a TP or FN, the other has to have a TP or FN, since
1126
+ # there was something in the GT
1127
+ if ('tp' in result_types_present_a) or ('fn' in result_types_present_a):
1128
+ assert 'tp' in result_types_present_b or 'fn' in result_types_present_b
1129
+ if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
1130
+ assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
461
1131
 
462
- if detection_a and detection_b:
463
- if (categories_above_threshold_a == categories_above_threshold_b) or \
464
- options.class_agnostic_comparison:
465
- common_detections[fn] = im_pair
466
- else:
467
- class_transitions[fn] = im_pair
468
- elif (not detection_a) and (not detection_b):
469
- common_non_detections[fn] = im_pair
470
- elif detection_a and (not detection_b):
471
- detections_a_only[fn] = im_pair
1132
+ # If either model has a TP or FN, the other has to have a TP or FN, since
1133
+ # there was something in the GT
1134
+ if ('tp' in result_types_present_a) or ('fn' in result_types_present_a):
1135
+ assert 'tp' in result_types_present_b or 'fn' in result_types_present_b
1136
+ if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
1137
+ assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
1138
+
1139
+
1140
+ ## Choose a comparison category based on result types
1141
+
1142
+ comparison_category = _result_types_to_comparison_category(
1143
+ result_types_present_a,result_types_present_b,ground_truth_type,options)
1144
+
1145
+ # TODO: this may or may not be the right way to interpret sorting
1146
+ # by confidence in this case, e.g., we may want to sort by confidence
1147
+ # of correct or incorrect matches. But this isn't *wrong*.
1148
+ max_conf_a = _maxempty([det['conf'] for det in im_a['detections']])
1149
+ max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
1150
+ sort_conf = max(max_conf_a,max_conf_b)
1151
+
472
1152
  else:
473
- assert detection_b and (not detection_a)
474
- detections_b_only[fn] = im_pair
475
1153
 
476
- # ...for each filename
477
-
478
- print('Of {} files:\n{} common detections\n{} common non-detections\n{} A only\n{} B only\n{} class transitions'.format(
479
- len(filenames_to_compare),len(common_detections),
480
- len(common_non_detections),len(detections_a_only),
481
- len(detections_b_only),len(class_transitions)))
1154
+ # Categorize each model into TP/TN/FP/FN
1155
+ def _categorize_image_with_image_level_gt(im_detection,im_gt,annotations_gt,
1156
+ category_id_to_threshold):
1157
+
1158
+ assert im_detection['file'] == im_gt['file_name']
1159
+
1160
+ # List of result types - tn, tp, fp, fn - present in this image.
1161
+ result_types_present = set()
1162
+
1163
+ # Find detections above threshold
1164
+ category_names_detected = set()
1165
+
1166
+ # det = im_detection['detections'][0]
1167
+ for det in im_detection['detections']:
1168
+ category_id = det['category']
1169
+ threshold = category_id_to_threshold[category_id]
1170
+ if det['conf'] > threshold:
1171
+ category_name = detection_category_id_to_name[det['category']]
1172
+ category_names_detected.add(category_name)
1173
+
1174
+ category_names_in_gt = set()
1175
+
1176
+ # ann = annotations_gt[0]
1177
+ for ann in annotations_gt:
1178
+ category_name = gt_category_id_to_name[ann['category_id']]
1179
+ category_names_in_gt.add(category_name)
1180
+
1181
+ for category_name in category_names_detected:
1182
+
1183
+ if category_name in category_names_in_gt:
1184
+ result_types_present.add('tp')
1185
+ else:
1186
+ result_types_present.add('fp')
1187
+
1188
+ for category_name in category_names_in_gt:
1189
+
1190
+ # Is this an empty image?
1191
+ if category_name in options.gt_empty_categories:
1192
+
1193
+ assert all([cn in options.gt_empty_categories for cn in category_names_in_gt]), \
1194
+ 'Image {} has both empty and non-empty ground truth labels'.format(
1195
+ im_detection['file'])
1196
+ if len(category_names_detected) > 0:
1197
+ result_types_present.add('fp')
1198
+ # If there is a false positive present in an empty image, there can't
1199
+ # be any other result types present
1200
+ assert len(result_types_present) == 1
1201
+ else:
1202
+ result_types_present.add('tn')
1203
+
1204
+ elif category_name in category_names_detected:
1205
+
1206
+ assert 'tp' in result_types_present
1207
+
1208
+ else:
1209
+
1210
+ result_types_present.add('fn')
1211
+
1212
+ return result_types_present
1213
+
1214
+ # ...def _categorize_image_with_image_level_gt(...)
1215
+
1216
+ # if 'val#human#human#HoSa#2021.006_na#2021#2021.006 (2021)#20210713' in im_a['file']:
1217
+ # import pdb; pdb.set_trace()
1218
+
1219
+ # im_detection = im_a; category_id_to_threshold = category_id_to_threshold_a
1220
+ result_types_present_a = \
1221
+ _categorize_image_with_image_level_gt(im_a,im_gt,annotations_gt,category_id_to_threshold_a)
1222
+ result_types_present_b = \
1223
+ _categorize_image_with_image_level_gt(im_b,im_gt,annotations_gt,category_id_to_threshold_b)
1224
+
1225
+
1226
+ ## Some combinations are nonsense
1227
+
1228
+ # If either model has a TP or FN, the other has to have a TP or FN, since
1229
+ # there was something in the GT
1230
+ if ('tp' in result_types_present_a) or ('fn' in result_types_present_a):
1231
+ assert 'tp' in result_types_present_b or 'fn' in result_types_present_b
1232
+ if ('tp' in result_types_present_b) or ('fn' in result_types_present_b):
1233
+ assert 'tp' in result_types_present_a or 'fn' in result_types_present_a
1234
+
1235
+
1236
+ ## Choose a comparison category based on result types
1237
+
1238
+ comparison_category = _result_types_to_comparison_category(
1239
+ result_types_present_a,result_types_present_b,ground_truth_type,options)
1240
+
1241
+ # TODO: this may or may not be the right way to interpret sorting
1242
+ # by confidence in this case, e.g., we may want to sort by confidence
1243
+ # of correct or incorrect matches. But this isn't *wrong*.
1244
+ max_conf_a = _maxempty([det['conf'] for det in im_a['detections']])
1245
+ max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
1246
+ sort_conf = max(max_conf_a,max_conf_b)
1247
+
1248
+ # ...what kind of ground truth (if any) do we have?
482
1249
 
1250
+ assert comparison_category is not None
1251
+ categories_to_image_pairs[comparison_category][fn] = im_pair
1252
+ im_pair['sort_conf'] = sort_conf
1253
+
1254
+ # ...for each filename
1255
+
483
1256
 
484
1257
  ##%% Sample and plot differences
485
1258
 
@@ -493,22 +1266,6 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
493
1266
  else:
494
1267
  pool = Pool(options.n_rendering_workers)
495
1268
 
496
- categories_to_image_pairs = {
497
- 'common_detections':common_detections,
498
- 'common_non_detections':common_non_detections,
499
- 'detections_a_only':detections_a_only,
500
- 'detections_b_only':detections_b_only,
501
- 'class_transitions':class_transitions
502
- }
503
-
504
- categories_to_page_titles = {
505
- 'common_detections':'Detections common to both models',
506
- 'common_non_detections':'Non-detections common to both models',
507
- 'detections_a_only':'Detections reported by model A only',
508
- 'detections_b_only':'Detections reported by model B only',
509
- 'class_transitions':'Detections reported as different classes by models A and B'
510
- }
511
-
512
1269
  local_output_folder = os.path.join(options.output_folder,'cmp_' + \
513
1270
  str(output_index).zfill(3))
514
1271
 
@@ -537,6 +1294,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
537
1294
 
538
1295
  # ...def render_detection_comparisons()
539
1296
 
1297
+ if len(options.colormap_a) > 1:
1298
+ color_string_a = str(options.colormap_a)
1299
+ else:
1300
+ color_string_a = options.colormap_a[0]
1301
+
1302
+ if len(options.colormap_b) > 1:
1303
+ color_string_b = str(options.colormap_b)
1304
+ else:
1305
+ color_string_b = options.colormap_b[0]
1306
+
1307
+
540
1308
  # For each category, generate comparison images and the
541
1309
  # comparison HTML page.
542
1310
  #
@@ -575,34 +1343,23 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
575
1343
 
576
1344
  input_path_relative = image_filenames[i_fn]
577
1345
  image_pair = image_pairs[input_path_relative]
578
- assert len(image_pair) == 2; image_a = image_pair[0]; image_b = image_pair[1]
1346
+ image_a = image_pair['im_a']
1347
+ image_b = image_pair['im_b']
579
1348
 
580
- def maxempty(L):
581
- if len(L) == 0:
582
- return 0
583
- else:
584
- return max(L)
1349
+ if options.fn_to_display_fn is not None:
1350
+ assert input_path_relative in options.fn_to_display_fn, \
1351
+ 'fn_to_display_fn provided, but {} is not mapped'.format(input_path_relative)
1352
+ display_path = options.fn_to_display_fn[input_path_relative]
1353
+ else:
1354
+ display_path = input_path_relative
585
1355
 
586
- max_conf_a = maxempty([det['conf'] for det in image_a['detections']])
587
- max_conf_b = maxempty([det['conf'] for det in image_b['detections']])
1356
+ sort_conf = image_pair['sort_conf']
588
1357
 
589
- title = input_path_relative + ' (max conf {:.2f},{:.2f})'.format(max_conf_a,max_conf_b)
1358
+ max_conf_a = _maxempty([det['conf'] for det in image_a['detections']])
1359
+ max_conf_b = _maxempty([det['conf'] for det in image_b['detections']])
1360
+
1361
+ title = display_path + ' (max conf {:.2f},{:.2f})'.format(max_conf_a,max_conf_b)
590
1362
 
591
- # Only used if sort_by_confidence is True
592
- if category == 'common_detections':
593
- sort_conf = max(max_conf_a,max_conf_b)
594
- elif category == 'common_non_detections':
595
- sort_conf = max(max_conf_a,max_conf_b)
596
- elif category == 'detections_a_only':
597
- sort_conf = max_conf_a
598
- elif category == 'detections_b_only':
599
- sort_conf = max_conf_b
600
- elif category == 'class_transitions':
601
- sort_conf = max(max_conf_a,max_conf_b)
602
- else:
603
- print('Warning: unknown sort category {}'.format(category))
604
- sort_conf = max(max_conf_a,max_conf_b)
605
-
606
1363
  info = {
607
1364
  'filename': fn,
608
1365
  'title': title,
@@ -611,15 +1368,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
611
1368
  'linkTarget': urllib.parse.quote(input_image_absolute_paths[i_fn]),
612
1369
  'sort_conf':sort_conf
613
1370
  }
1371
+
614
1372
  image_info.append(info)
615
1373
 
616
1374
  # ...for each image
617
1375
 
618
- category_page_header_string = '<h1>{}</h1>'.format(categories_to_page_titles[category])
1376
+ category_page_header_string = '<h1>{}</h1>\n'.format(categories_to_page_titles[category])
619
1377
  category_page_header_string += '<p style="font-weight:bold;">\n'
620
- category_page_header_string += 'Model A: {}<br/>\n'.format(
621
- pairwise_options.results_description_a)
622
- category_page_header_string += 'Model B: {}'.format(pairwise_options.results_description_b)
1378
+ category_page_header_string += 'Model A: {} ({})<br/>\n'.format(
1379
+ pairwise_options.results_description_a,color_string_a)
1380
+ category_page_header_string += 'Model B: {} ({})'.format(
1381
+ pairwise_options.results_description_b,color_string_b)
623
1382
  category_page_header_string += '</p>\n'
624
1383
 
625
1384
  category_page_header_string += '<p>\n'
@@ -635,6 +1394,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
635
1394
  str(pairwise_options.rendering_confidence_threshold_b))
636
1395
  category_page_header_string += '</p>\n'
637
1396
 
1397
+ subpage_header_string = '\n'.join(category_page_header_string.split('\n')[1:])
1398
+
638
1399
  # Default to sorting by filename
639
1400
  if options.sort_by_confidence:
640
1401
  image_info = sorted(image_info, key=lambda d: d['sort_conf'], reverse=True)
@@ -646,6 +1407,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
646
1407
  images=image_info,
647
1408
  options={
648
1409
  'headerHtml': category_page_header_string,
1410
+ 'subPageHeaderHtml': subpage_header_string,
649
1411
  'maxFiguresPerHtmlFile': options.max_images_per_page
650
1412
  })
651
1413
 
@@ -656,8 +1418,9 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
656
1418
 
657
1419
  html_output_string = ''
658
1420
 
659
- html_output_string += '<p>Comparing <b>{}</b> (A, red) to <b>{}</b> (B, blue)</p>'.format(
660
- pairwise_options.results_description_a,pairwise_options.results_description_b)
1421
+ html_output_string += '<p>Comparing <b>{}</b> (A, {}) to <b>{}</b> (B, {})</p>'.format(
1422
+ pairwise_options.results_description_a,color_string_a.lower(),
1423
+ pairwise_options.results_description_b,color_string_b.lower())
661
1424
  html_output_string += '<div class="contentdiv">\n'
662
1425
  html_output_string += 'Detection thresholds for {}:\n{}<br/>'.format(
663
1426
  pairwise_options.results_description_a,
@@ -679,10 +1442,19 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
679
1442
 
680
1443
  html_output_string += '<br/>'
681
1444
 
682
- html_output_string += ('Of {} total files:<br/><br/><div style="margin-left:15px;">{} common detections<br/>{} common non-detections<br/>{} A only<br/>{} B only<br/>{} class transitions</div><br/>'.format(
683
- len(filenames_to_compare),len(common_detections),
684
- len(common_non_detections),len(detections_a_only),
685
- len(detections_b_only),len(class_transitions)))
1445
+ category_summary = ''
1446
+ for i_category,category_name in enumerate(categories_to_image_pairs):
1447
+ if i_category > 0:
1448
+ category_summary += '<br/>'
1449
+ category_summary += '{} {}'.format(
1450
+ len(categories_to_image_pairs[category_name]),
1451
+ category_name.replace('_',' '))
1452
+
1453
+ category_summary = \
1454
+ 'Of {} total files:<br/><br/><div style="margin-left:15px;">{}</div><br/>'.format(
1455
+ len(filenames_to_compare),category_summary)
1456
+
1457
+ html_output_string += category_summary
686
1458
 
687
1459
  html_output_string += 'Comparison pages:<br/><br/>\n'
688
1460
  html_output_string += '<div style="margin-left:15px;">\n'
@@ -738,6 +1510,7 @@ def compare_batch_results(options):
738
1510
  all_pairwise_results = []
739
1511
 
740
1512
  # i_comparison = 0; pairwise_options = pairwise_options_list[i_comparison]
1513
+
741
1514
  for i_comparison,pairwise_options in enumerate(pairwise_options_list):
742
1515
  print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
743
1516
  pairwise_results = \
@@ -764,7 +1537,11 @@ def compare_batch_results(options):
764
1537
  return results
765
1538
 
766
1539
 
767
- def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thresholds=None):
1540
+ def n_way_comparison(filenames,
1541
+ options,
1542
+ detection_thresholds=None,
1543
+ rendering_thresholds=None,
1544
+ model_names=None):
768
1545
  """
769
1546
  Performs N pairwise comparisons for the list of results files in [filenames], by generating
770
1547
  sets of pairwise options and calling compare_batch_results.
@@ -777,6 +1554,8 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
777
1554
  as [filenames], or None to use sensible defaults
778
1555
  rendering_thresholds (list, optional): list of rendering thresholds with the same length
779
1556
  as [filenames], or None to use sensible defaults
1557
+ model_names (list, optional): list of model names to use the output HTML file, with
1558
+ the same length as [filenames], or None to use sensible defaults
780
1559
 
781
1560
  Returns:
782
1561
  BatchComparisonResults: the results of this comparison task
@@ -784,13 +1563,19 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
784
1563
 
785
1564
  if detection_thresholds is None:
786
1565
  detection_thresholds = [0.15] * len(filenames)
787
- assert len(detection_thresholds) == len(filenames)
1566
+ assert len(detection_thresholds) == len(filenames), \
1567
+ '[detection_thresholds] should be the same length as [filenames]'
788
1568
 
789
1569
  if rendering_thresholds is not None:
790
- assert len(rendering_thresholds) == len(detection_thresholds)
1570
+ assert len(rendering_thresholds) == len(filenames)
1571
+ '[rendering_thresholds] should be the same length as [filenames]'
791
1572
  else:
792
1573
  rendering_thresholds = [(x*0.6666) for x in detection_thresholds]
793
1574
 
1575
+ if model_names is not None:
1576
+ assert len(model_names) == len(filenames), \
1577
+ '[model_names] should be the same length as [filenames]'
1578
+
794
1579
  # Choose all pairwise combinations of the files in [filenames]
795
1580
  for i, j in itertools.combinations(list(range(0,len(filenames))),2):
796
1581
 
@@ -805,6 +1590,10 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
805
1590
  pairwise_options.detection_thresholds_a = {'default':detection_thresholds[i]}
806
1591
  pairwise_options.detection_thresholds_b = {'default':detection_thresholds[j]}
807
1592
 
1593
+ if model_names is not None:
1594
+ pairwise_options.results_description_a = model_names[i]
1595
+ pairwise_options.results_description_b = model_names[j]
1596
+
808
1597
  options.pairwise_options.append(pairwise_options)
809
1598
 
810
1599
  return compare_batch_results(options)