megadetector 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (62) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +278 -197
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  5. api/batch_processing/postprocessing/load_api_results.py +55 -69
  6. api/batch_processing/postprocessing/md_to_labelme.py +1 -0
  7. api/batch_processing/postprocessing/postprocess_batch_results.py +158 -50
  8. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  9. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  10. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  11. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +222 -74
  12. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  13. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  14. classification/prepare_classification_script.py +191 -191
  15. data_management/coco_to_yolo.py +65 -44
  16. data_management/databases/integrity_check_json_db.py +7 -5
  17. data_management/generate_crops_from_cct.py +1 -1
  18. data_management/importers/animl_results_to_md_results.py +2 -2
  19. data_management/importers/noaa_seals_2019.py +1 -1
  20. data_management/importers/zamba_results_to_md_results.py +2 -2
  21. data_management/labelme_to_coco.py +34 -6
  22. data_management/labelme_to_yolo.py +1 -1
  23. data_management/lila/create_lila_blank_set.py +474 -0
  24. data_management/lila/create_lila_test_set.py +2 -1
  25. data_management/lila/create_links_to_md_results_files.py +1 -1
  26. data_management/lila/download_lila_subset.py +46 -21
  27. data_management/lila/generate_lila_per_image_labels.py +23 -14
  28. data_management/lila/get_lila_annotation_counts.py +16 -10
  29. data_management/lila/lila_common.py +14 -11
  30. data_management/lila/test_lila_metadata_urls.py +116 -0
  31. data_management/resize_coco_dataset.py +12 -10
  32. data_management/yolo_output_to_md_output.py +40 -13
  33. data_management/yolo_to_coco.py +34 -21
  34. detection/process_video.py +36 -14
  35. detection/pytorch_detector.py +1 -1
  36. detection/run_detector.py +73 -18
  37. detection/run_detector_batch.py +104 -24
  38. detection/run_inference_with_yolov5_val.py +127 -26
  39. detection/run_tiled_inference.py +153 -43
  40. detection/video_utils.py +3 -1
  41. md_utils/ct_utils.py +79 -3
  42. md_utils/md_tests.py +253 -15
  43. md_utils/path_utils.py +129 -24
  44. md_utils/process_utils.py +26 -7
  45. md_utils/split_locations_into_train_val.py +215 -0
  46. md_utils/string_utils.py +10 -0
  47. md_utils/url_utils.py +0 -2
  48. md_utils/write_html_image_list.py +1 -0
  49. md_visualization/visualization_utils.py +17 -2
  50. md_visualization/visualize_db.py +8 -0
  51. md_visualization/visualize_detector_output.py +185 -104
  52. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/METADATA +2 -2
  53. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/RECORD +62 -58
  54. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  55. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  56. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  57. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  58. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  59. taxonomy_mapping/species_lookup.py +33 -13
  60. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  61. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  62. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
@@ -249,8 +249,12 @@ if False:
249
249
  import os
250
250
  import nbformat as nbf
251
251
 
252
- input_py_file = os.path.expanduser(
253
- '~/git/MegaDetector/api/batch_processing/data_preparation/manage_video_batch.py')
252
+ if os.name == 'nt':
253
+ git_base = r'c:\git'
254
+ else:
255
+ git_base = os.path.expanduer('~/git')
256
+
257
+ input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_video_batch.py'
254
258
  assert os.path.isfile(input_py_file)
255
259
  output_ipynb_file = input_py_file.replace('.py','.ipynb')
256
260
 
@@ -320,3 +324,4 @@ while(True):
320
324
  write_code_cell(current_cell)
321
325
 
322
326
  nbf.write(nb,output_ipynb_file)
327
+
@@ -61,3 +61,4 @@ def main():
61
61
 
62
62
  if __name__ == '__main__':
63
63
  main()
64
+
@@ -4,7 +4,7 @@
4
4
  #
5
5
  # Compare sets of batch results; typically used to compare:
6
6
  #
7
- # * MegaDetector versions
7
+ # * Results from different MegaDetector versions
8
8
  # * Results before/after RDE
9
9
  # * Results with/without augmentation
10
10
  #
@@ -36,9 +36,6 @@ from md_utils import path_utils
36
36
 
37
37
 
38
38
  #%% Constants and support classes
39
-
40
- # We will confirm that this matches what we load from each file
41
- default_detection_categories = {'1': 'animal', '2': 'person', '3': 'vehicle'}
42
39
 
43
40
  class PairwiseBatchComparisonOptions:
44
41
  """
@@ -52,8 +49,8 @@ class PairwiseBatchComparisonOptions:
52
49
  results_description_a = None
53
50
  results_description_b = None
54
51
 
55
- detection_thresholds_a = {'animal':0.15,'person':0.15,'vehicle':0.15}
56
- detection_thresholds_b = {'animal':0.15,'person':0.15,'vehicle':0.15}
52
+ detection_thresholds_a = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
53
+ detection_thresholds_b = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
57
54
 
58
55
  rendering_confidence_threshold_a = 0.1
59
56
  rendering_confidence_threshold_b = 0.1
@@ -71,16 +68,26 @@ class BatchComparisonOptions:
71
68
  job_name = ''
72
69
 
73
70
  max_images_per_category = 1000
71
+ max_images_per_page = None
74
72
  colormap_a = ['Red']
75
73
  colormap_b = ['RoyalBlue']
76
74
 
77
75
  # Process-based parallelization isn't supported yet; this must be "True"
78
76
  parallelize_rendering_with_threads = True
79
77
 
78
+ # List of filenames to include in the comparison, or None to use all files
79
+ filenames_to_include = None
80
+
81
+ # Compare only detections/non-detections, ignore categories (still renders categories)
82
+ class_agnostic_comparison = False
83
+
80
84
  target_width = 800
81
85
  n_rendering_workers = 20
82
86
  random_seed = 0
83
87
 
88
+ # Default to sorting by filename
89
+ sort_by_confidence = False
90
+
84
91
  error_on_non_matching_lists = True
85
92
 
86
93
  pairwise_options = []
@@ -90,7 +97,7 @@ class BatchComparisonOptions:
90
97
 
91
98
  class PairwiseBatchComparisonResults:
92
99
  """
93
- The results from a single pairwise comparison
100
+ The results from a single pairwise comparison.
94
101
  """
95
102
 
96
103
  html_content = None
@@ -98,7 +105,7 @@ class PairwiseBatchComparisonResults:
98
105
 
99
106
  # A dictionary with keys including:
100
107
  #
101
- # "common_detections"
108
+ # common_detections
102
109
  # common_non_detections
103
110
  # detections_a_only
104
111
  # detections_b_only
@@ -207,7 +214,8 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
207
214
  # in the options object.
208
215
  assert options.pairwise_options is None
209
216
 
210
- random.seed(options.random_seed)
217
+ if options.random_seed is not None:
218
+ random.seed(options.random_seed)
211
219
 
212
220
  # Warn the user if some "detections" might not get rendered
213
221
  max_classification_threshold_a = max(list(pairwise_options.detection_thresholds_a.values()))
@@ -241,10 +249,20 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
241
249
  with open(pairwise_options.results_filename_b,'r') as f:
242
250
  results_b = json.load(f)
243
251
 
244
- # assert results_a['detection_categories'] == default_detection_categories
245
- # assert results_b['detection_categories'] == default_detection_categories
246
- assert results_a['detection_categories'] == results_b['detection_categories']
247
- detection_categories = results_a['detection_categories']
252
+ # Don't let path separators confuse things
253
+ for im in results_a['images']:
254
+ if 'file' in im:
255
+ im['file'] = im['file'].replace('\\','/')
256
+ for im in results_b['images']:
257
+ if 'file' in im:
258
+ im['file'] = im['file'].replace('\\','/')
259
+
260
+ if not options.class_agnostic_comparison:
261
+ assert results_a['detection_categories'] == results_b['detection_categories'], \
262
+ "Cannot perform a class-sensitive comparison across results with different categories"
263
+
264
+ detection_categories_a = results_a['detection_categories']
265
+ detection_categories_b = results_b['detection_categories']
248
266
 
249
267
  if pairwise_options.results_description_a is None:
250
268
  if 'detector' not in results_a['info']:
@@ -286,6 +304,10 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
286
304
  assert len(filenames_a) == len(images_a)
287
305
  assert len(filenames_b_set) == len(images_b)
288
306
 
307
+ if options.filenames_to_include is None:
308
+ filenames_to_compare = filenames_a
309
+ else:
310
+ filenames_to_compare = options.filenames_to_include
289
311
 
290
312
  ##%% Find differences
291
313
 
@@ -298,9 +320,9 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
298
320
  detections_a_only = {}
299
321
  detections_b_only = {}
300
322
  class_transitions = {}
301
-
302
- # fn = filenames_a[0]
303
- for fn in tqdm(filenames_a):
323
+
324
+ # fn = filenames_to_compare[0]
325
+ for fn in tqdm(filenames_to_compare):
304
326
 
305
327
  if fn not in filename_to_image_b:
306
328
 
@@ -330,14 +352,19 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
330
352
 
331
353
  category_id = det['category']
332
354
 
333
- if category_id not in detection_categories:
355
+ if category_id not in detection_categories_a:
334
356
  print('Warning: unexpected category {} for model A on file {}'.format(category_id,fn))
335
357
  invalid_category_error = True
336
358
  break
337
359
 
338
360
  conf = det['conf']
339
361
 
340
- if conf >= pairwise_options.detection_thresholds_a[detection_categories[category_id]]:
362
+ if detection_categories_a[category_id] in pairwise_options.detection_thresholds_a:
363
+ conf_thresh = pairwise_options.detection_thresholds_a[detection_categories_a[category_id]]
364
+ else:
365
+ conf_thresh = pairwise_options.detection_thresholds_a['default']
366
+
367
+ if conf >= conf_thresh:
341
368
  categories_above_threshold_a.add(category_id)
342
369
 
343
370
  if invalid_category_error:
@@ -349,14 +376,19 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
349
376
 
350
377
  category_id = det['category']
351
378
 
352
- if category_id not in detection_categories:
379
+ if category_id not in detection_categories_b:
353
380
  print('Warning: unexpected category {} for model B on file {}'.format(category_id,fn))
354
381
  invalid_category_error = True
355
382
  break
356
383
 
357
384
  conf = det['conf']
358
385
 
359
- if conf >= pairwise_options.detection_thresholds_b[detection_categories[category_id]]:
386
+ if detection_categories_b[category_id] in pairwise_options.detection_thresholds_b:
387
+ conf_thresh = pairwise_options.detection_thresholds_b[detection_categories_b[category_id]]
388
+ else:
389
+ conf_thresh = pairwise_options.detection_thresholds_a['default']
390
+
391
+ if conf >= conf_thresh:
360
392
  categories_above_threshold_b.add(category_id)
361
393
 
362
394
  if invalid_category_error:
@@ -368,7 +400,8 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
368
400
  detection_b = (len(categories_above_threshold_b) > 0)
369
401
 
370
402
  if detection_a and detection_b:
371
- if categories_above_threshold_a == categories_above_threshold_b:
403
+ if (categories_above_threshold_a == categories_above_threshold_b) or \
404
+ options.class_agnostic_comparison:
372
405
  common_detections[fn] = im_pair
373
406
  else:
374
407
  class_transitions[fn] = im_pair
@@ -383,7 +416,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
383
416
  # ...for each filename
384
417
 
385
418
  print('Of {} files:\n{} common detections\n{} common non-detections\n{} A only\n{} B only\n{} class transitions'.format(
386
- len(filenames_a),len(common_detections),
419
+ len(filenames_to_compare),len(common_detections),
387
420
  len(common_non_detections),len(detections_a_only),
388
421
  len(detections_b_only),len(class_transitions)))
389
422
 
@@ -453,14 +486,16 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
453
486
  # Choose detection pairs we're going to render for this category
454
487
  image_pairs = categories_to_image_pairs[category]
455
488
  image_filenames = list(image_pairs.keys())
456
- if len(image_filenames) > options.max_images_per_category:
457
- print('Sampling {} of {} image pairs for category {}'.format(
458
- options.max_images_per_category,
459
- len(image_filenames),
460
- category))
461
- image_filenames = random.sample(image_filenames,
462
- options.max_images_per_category)
463
- assert len(image_filenames) <= options.max_images_per_category
489
+
490
+ if options.max_images_per_category is not None and options.max_images_per_category > 0:
491
+ if len(image_filenames) > options.max_images_per_category:
492
+ print('Sampling {} of {} image pairs for category {}'.format(
493
+ options.max_images_per_category,
494
+ len(image_filenames),
495
+ category))
496
+ image_filenames = random.sample(image_filenames,
497
+ options.max_images_per_category)
498
+ assert len(image_filenames) <= options.max_images_per_category
464
499
 
465
500
  input_image_absolute_paths = [os.path.join(options.image_folder,fn) for fn in image_filenames]
466
501
 
@@ -492,15 +527,34 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
492
527
  max_conf_b = maxempty([det['conf'] for det in image_b['detections']])
493
528
 
494
529
  title = input_path_relative + ' (max conf {:.2f},{:.2f})'.format(max_conf_a,max_conf_b)
530
+
531
+ # Only used if sort_by_confidence is True
532
+ if category == 'common_detections':
533
+ sort_conf = max(max_conf_a,max_conf_b)
534
+ elif category == 'common_non_detections':
535
+ sort_conf = max(max_conf_a,max_conf_b)
536
+ elif category == 'detections_a_only':
537
+ sort_conf = max_conf_a
538
+ elif category == 'detections_b_only':
539
+ sort_conf = max_conf_b
540
+ elif category == 'class_transitions':
541
+ sort_conf = max(max_conf_a,max_conf_b)
542
+ else:
543
+ print('Warning: unknown sort category {}'.format(category))
544
+ sort_conf = max(max_conf_a,max_conf_b)
545
+
495
546
  info = {
496
547
  'filename': fn,
497
548
  'title': title,
498
549
  'textStyle': 'font-family:verdana,arial,calibri;font-size:' + \
499
550
  '80%;text-align:left;margin-top:20;margin-bottom:5',
500
- 'linkTarget': urllib.parse.quote(input_image_absolute_paths[i_fn])
551
+ 'linkTarget': urllib.parse.quote(input_image_absolute_paths[i_fn]),
552
+ 'sort_conf':sort_conf
501
553
  }
502
554
  image_info.append(info)
503
555
 
556
+ # ...for each image
557
+
504
558
  category_page_header_string = '<h1>{}</h1>'.format(categories_to_page_titles[category])
505
559
  category_page_header_string += '<p style="font-weight:bold;">\n'
506
560
  category_page_header_string += 'Model A: {}<br/>\n'.format(
@@ -521,11 +575,18 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
521
575
  str(pairwise_options.rendering_confidence_threshold_b))
522
576
  category_page_header_string += '</p>\n'
523
577
 
578
+ # Default to sorting by filename
579
+ if options.sort_by_confidence:
580
+ image_info = sorted(image_info, key=lambda d: d['sort_conf'], reverse=True)
581
+ else:
582
+ image_info = sorted(image_info, key=lambda d: d['filename'])
583
+
524
584
  write_html_image_list(
525
585
  category_html_filename,
526
586
  images=image_info,
527
587
  options={
528
- 'headerHtml': category_page_header_string
588
+ 'headerHtml': category_page_header_string,
589
+ 'maxFiguresPerHtmlFile': options.max_images_per_page
529
590
  })
530
591
 
531
592
  # ...for each category
@@ -559,7 +620,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
559
620
  html_output_string += '<br/>'
560
621
 
561
622
  html_output_string += ('Of {} total files:<br/><br/><div style="margin-left:15px;">{} common detections<br/>{} common non-detections<br/>{} A only<br/>{} B only<br/>{} class transitions</div><br/>'.format(
562
- len(filenames_a),len(common_detections),
623
+ len(filenames_to_compare),len(common_detections),
563
624
  len(common_non_detections),len(detections_a_only),
564
625
  len(detections_b_only),len(class_transitions)))
565
626
 
@@ -583,7 +644,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
583
644
 
584
645
  return pairwise_results
585
646
 
586
- # ...def compare_batch_results()
647
+ # ...def pairwise_compare_batch_results()
587
648
 
588
649
 
589
650
  def compare_batch_results(options):
@@ -663,12 +724,9 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
663
724
  pairwise_options.rendering_confidence_threshold_a = rendering_thresholds[i]
664
725
  pairwise_options.rendering_confidence_threshold_b = rendering_thresholds[j]
665
726
 
666
- pairwise_options.detection_thresholds_a = {'animal':detection_thresholds[i],
667
- 'person':detection_thresholds[i],
668
- 'vehicle':detection_thresholds[i]}
669
- pairwise_options.detection_thresholds_b = {'animal':detection_thresholds[j],
670
- 'person':detection_thresholds[j],
671
- 'vehicle':detection_thresholds[j]}
727
+ pairwise_options.detection_thresholds_a = {'default':detection_thresholds[i]}
728
+ pairwise_options.detection_thresholds_b = {'default':detection_thresholds[j]}
729
+
672
730
  options.pairwise_options.append(pairwise_options)
673
731
 
674
732
  return compare_batch_results(options)
@@ -679,32 +737,25 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
679
737
  #%% Interactive driver
680
738
 
681
739
  if False:
682
-
683
- #%% Running KGA test
684
-
685
- # CUDA_VISIBLE_DEVICES=0 python run_detector_batch.py ~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt ~/data/KGA/ ~/data/KGA-5a.json --recursive --output_relative_filenames --quiet
686
- # CUDA_VISIBLE_DEVICES=1 python run_detector_batch.py ~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt ~/data/KGA/ ~/data/KGA-5b.json --recursive --output_relative_filenames --quiet
687
-
688
- # python run_detector_batch.py ~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb ~/data/KGA ~/data/KGA-4.json --recursive --output_relative_filenames --quiet
689
-
690
- # CUDA_VISIBLE_DEVICES=0 python run_detector_batch.py ~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt ~/data/KGA/ ~/data/KGA-5a-pillow-9.2.0.json --recursive --output_relative_filenames --quiet
691
-
692
-
740
+
693
741
  #%% Test two-way comparison
694
742
 
695
743
  options = BatchComparisonOptions()
696
744
 
697
- options.parallelize_rendering_with_threads = False
745
+ options.parallelize_rendering_with_threads = True
746
+
747
+ options.job_name = 'BCT'
748
+ options.output_folder = r'g:\temp\comparisons'
749
+ options.image_folder = r'g:\camera_traps\camera_trap_images'
750
+ options.max_images_per_category = 100
751
+ options.sort_by_confidence = True
698
752
 
699
- options.job_name = 'KGA-test'
700
- options.output_folder = os.path.expanduser('~/tmp/md-comparison-test')
701
- options.image_folder = os.path.expanduser('~/data/KGA')
702
-
703
753
  options.pairwise_options = []
704
754
 
705
- filenames = [
706
- os.path.expanduser('~/data/KGA-5a.json'),
707
- os.path.expanduser('~/data/KGA-5b.json')
755
+ results_base = os.path.expanduser('~/postprocessing/bellevue-camera-traps')
756
+ filenames = [
757
+ os.path.join(results_base,r'bellevue-camera-traps-2023-12-05-v5a.0.0\combined_api_outputs\bellevue-camera-traps-2023-12-05-v5a.0.0_detections.json'),
758
+ os.path.join(results_base,r'bellevue-camera-traps-2023-12-05-aug-v5a.0.0\combined_api_outputs\bellevue-camera-traps-2023-12-05-aug-v5a.0.0_detections.json')
708
759
  ]
709
760
 
710
761
  detection_thresholds = [0.15,0.15]
@@ -835,4 +886,3 @@ def main():
835
886
  if __name__ == '__main__':
836
887
 
837
888
  main()
838
-
@@ -2,17 +2,18 @@
2
2
  #
3
3
  # load_api_results.py
4
4
  #
5
- # Loads the output of the batch processing API (json) into a pandas dataframe.
5
+ # DEPRECATED
6
6
  #
7
- # Also functions to group entries by seq_id.
7
+ # As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
+ # for new code.
8
9
  #
9
- # Includes the deprecated functions that worked with the old CSV API output format.
10
+ # Loads the output of the batch processing API (json) into a Pandas dataframe.
11
+ #
12
+ # Includes functions to read/write the (very very old) .csv results format.
10
13
  #
11
14
  ########
12
15
 
13
- #%% Constants and imports
14
-
15
- from collections import defaultdict
16
+ #%% Imports
16
17
 
17
18
  import json
18
19
  import os
@@ -23,72 +24,32 @@ import pandas as pd
23
24
 
24
25
  from md_utils import ct_utils
25
26
 
26
- headers = ['image_path', 'max_confidence', 'detections']
27
-
28
-
29
- #%% Functions for grouping by sequence_id
30
-
31
- def ss_file_to_file_name(f):
32
- # example
33
- # input 'file': 'SER/S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
34
- # output 'id': 'S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
35
- return f.split('SER/')[1].split('.JPG')[0]
36
-
37
-
38
- def caltech_file_to_file_name(f):
39
- return f.split('cct_images/')[1].split('.')[0]
40
27
 
41
-
42
- def api_results_groupby(api_output_path, gt_db_indexed, file_to_image_id, field='seq_id'):
43
- """
44
- Given the output file of the API, groupby (currently only seq_id).
45
-
46
- Args:
47
- api_output_path: path to the API output json file
48
- gt_db_indexed: an instance of IndexedJsonDb so we know the seq_id to image_id mapping
49
- file_to_image_id: a function that takes in the 'file' field in 'images' in the detector
50
- output file and converts it to the 'id' field in the gt DB.
51
- field: which field in the 'images' array to group by
52
-
53
- Returns:
54
- A dict where the keys are of the field requested, each points to an array
55
- containing entries in the 'images' section of the output file
56
- """
57
-
58
- with open(api_output_path) as f:
59
- detection_results = json.load(f)
60
-
61
- res = defaultdict(list)
62
- for i in detection_results['images']:
63
- image_id = file_to_image_id(i['file'])
64
- field_val = gt_db_indexed.image_id_to_image[image_id][field]
65
- res[field_val].append(i)
66
- return res
67
-
68
-
69
- #%% Functions for loading the result as a Pandas DataFrame
28
+ #%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
70
29
 
71
30
  def load_api_results(api_output_path: str, normalize_paths: bool = True,
72
- filename_replacements: Optional[Mapping[str, str]] = None
31
+ filename_replacements: Optional[Mapping[str, str]] = None,
32
+ force_forward_slashes: bool = True
73
33
  ) -> Tuple[pd.DataFrame, Dict]:
74
34
  """
75
- Loads the json formatted results from the batch processing API to a
76
- Pandas DataFrame, mainly useful for various postprocessing functions.
35
+ Loads json-formatted MegaDetector results to a Pandas DataFrame.
77
36
 
78
37
  Args:
79
- api_output_path: path to the API output json file
38
+ api_output_path: path to the output json file
80
39
  normalize_paths: whether to apply os.path.normpath to the 'file' field
81
40
  in each image entry in the output file
82
41
  filename_replacements: replace some path tokens to match local paths to
83
42
  the original blob structure
43
+ force_forward_slashes: whether to convert backslashes to forward slashes
44
+ in filenames
84
45
 
85
46
  Returns:
86
47
  detection_results: pd.DataFrame, contains at least the columns:
87
- ['file', 'detections','failure']
48
+ ['file', 'detections','failure']
88
49
  other_fields: a dict containing fields in the results other than 'images'
89
50
  """
90
51
 
91
- print('Loading API results from {}'.format(api_output_path))
52
+ print('Loading results from {}'.format(api_output_path))
92
53
 
93
54
  with open(api_output_path) as f:
94
55
  detection_results = json.load(f)
@@ -97,7 +58,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
97
58
  for s in ['info', 'detection_categories', 'images']:
98
59
  assert s in detection_results, 'Missing field {} in detection results'.format(s)
99
60
 
100
- # Fields in the API output json other than 'images'
61
+ # Fields in the output json other than 'images'
101
62
  other_fields = {}
102
63
  for k, v in detection_results.items():
103
64
  if k != 'images':
@@ -109,6 +70,10 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
109
70
  image['file'] = os.path.normpath(image['file'])
110
71
  # image['file'] = image['file'].replace('\\','/')
111
72
 
73
+ if force_forward_slashes:
74
+ for image in detection_results['images']:
75
+ image['file'] = image['file'].replace('\\','/')
76
+
112
77
  # Replace some path tokens to match local paths to original blob structure
113
78
  if filename_replacements is not None:
114
79
  for string_to_replace in filename_replacements.keys():
@@ -127,9 +92,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
127
92
  # Pack the json output into a Pandas DataFrame
128
93
  detection_results = pd.DataFrame(detection_results['images'])
129
94
 
130
-
131
-
132
- print('Finished loading API results for {} images from {}'.format(
95
+ print('Finished loading MegaDetector results for {} images from {}'.format(
133
96
  len(detection_results),api_output_path))
134
97
 
135
98
  return detection_results, other_fields
@@ -137,7 +100,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
137
100
 
138
101
  def write_api_results(detection_results_table, other_fields, out_path):
139
102
  """
140
- Writes a Pandas DataFrame back to a json that is compatible with the API output format.
103
+ Writes a Pandas DataFrame to the MegaDetector .json format.
141
104
  """
142
105
 
143
106
  print('Writing detection results to {}'.format(out_path))
@@ -148,6 +111,27 @@ def write_api_results(detection_results_table, other_fields, out_path):
148
111
  double_precision=3)
149
112
  images = json.loads(images)
150
113
  fields['images'] = images
114
+
115
+ # Convert the 'version' field back to a string as per format convention
116
+ try:
117
+ version = other_fields['info']['format_version']
118
+ if not isinstance(version,str):
119
+ other_fields['info']['format_version'] = str(version)
120
+ except Exception:
121
+ print('Warning: error determining format version')
122
+ pass
123
+
124
+ # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
125
+ try:
126
+ version = other_fields['info']['format_version']
127
+ version = float(version)
128
+ if version >= 1.3:
129
+ for im in images:
130
+ if 'max_detection_conf' in im:
131
+ del im['max_detection_conf']
132
+ except Exception:
133
+ print('Warning: error removing max_detection_conf from output')
134
+ pass
151
135
 
152
136
  with open(out_path, 'w') as f:
153
137
  json.dump(fields, f, indent=1)
@@ -157,15 +141,16 @@ def write_api_results(detection_results_table, other_fields, out_path):
157
141
 
158
142
  def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
159
143
  """
160
- DEPRECATED
161
- Loads .csv-formatted results from the batch processing API to a pandas table
144
+ [DEPRECATED]
145
+
146
+ Loads .csv-formatted MegaDetector results to a pandas table
162
147
  """
163
148
 
164
- print('Loading API results from {}'.format(filename))
149
+ print('Loading MegaDetector results from {}'.format(filename))
165
150
 
166
151
  detection_results = pd.read_csv(filename,nrows=nrows)
167
152
 
168
- print('De-serializing API results from {}'.format(filename))
153
+ print('De-serializing MegaDetector results from {}'.format(filename))
169
154
 
170
155
  # Confirm that this is really a detector output file
171
156
  for s in ['image_path','max_confidence','detections']:
@@ -191,17 +176,18 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
191
176
  fn = fn.replace(string_to_replace,replacement_string)
192
177
  detection_results.at[iRow,'image_path'] = fn
193
178
 
194
- print('Finished loading and de-serializing API results for {} images from {}'.format(
179
+ print('Finished loading and de-serializing MD results for {} images from {}'.format(
195
180
  len(detection_results),filename))
196
181
 
197
182
  return detection_results
198
183
 
199
184
 
200
185
  def write_api_results_csv(detection_results, filename):
201
- """
202
- DEPRECATED
203
- Writes a pandas table to csv in a way that's compatible with the .csv API output
204
- format. Currently just a wrapper around to_csv that just forces output writing
186
+ """
187
+ [DEPRECATED]
188
+
189
+ Writes a Pandas table to csv in a way that's compatible with the .csv output
190
+ format. Currently just a wrapper around to_csv that forces output writing
205
191
  to go through a common code path.
206
192
  """
207
193
 
@@ -48,6 +48,7 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
48
48
  output_dict['imageHeight'] = im['height']
49
49
  output_dict['imageWidth'] = im['width']
50
50
  output_dict['imageData'] = None
51
+ output_dict['detections'] = im['detections']
51
52
 
52
53
  for det in im['detections']:
53
54