megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,7 @@ from tqdm import tqdm
29
29
 
30
30
  from detection.run_inference_with_yolov5_val import YoloInferenceOptions,run_inference_with_yolo_val
31
31
  from detection.run_detector_batch import load_and_run_detector_batch,write_results_to_file
32
+ from detection.run_detector import try_download_known_detector
32
33
 
33
34
  import torch
34
35
  from torchvision import ops
@@ -234,7 +235,7 @@ def in_place_nms(md_results, iou_thres=0.45, verbose=True):
234
235
  # i_image = 18; im = md_results['images'][i_image]
235
236
  for i_image,im in tqdm(enumerate(md_results['images']),total=len(md_results['images'])):
236
237
 
237
- if len(im['detections']) == 0:
238
+ if (im['detections'] is None) or (len(im['detections']) == 0):
238
239
  continue
239
240
 
240
241
  boxes = []
@@ -282,40 +283,52 @@ def in_place_nms(md_results, iou_thres=0.45, verbose=True):
282
283
 
283
284
  def _extract_tiles_for_image(fn_relative,image_folder,tiling_folder,patch_size,patch_stride,overwrite):
284
285
  """
285
- Extract tiles for a single image
286
+ Private function to extract tiles for a single image.
286
287
 
287
- Not really a standalone function; isolated from the main function to simplify
288
- multiprocessing.
288
+ Returns a dict with fields 'patches' (see extract_patch_from_image) and 'image_fn'.
289
+
290
+ If there is an error, 'patches' will be None and the 'error' field will contain
291
+ failure details. In that case, some tiles may still be generated.
289
292
  """
290
293
 
291
294
  fn_abs = os.path.join(image_folder,fn_relative)
295
+ error = None
296
+ patches = []
292
297
 
293
298
  image_name = path_utils.clean_filename(fn_relative,char_limit=None,force_lower=True)
294
299
 
295
- # Open the image
296
- im = vis_utils.open_image(fn_abs)
297
- image_size = [im.width,im.height]
300
+ try:
301
+
302
+ # Open the image
303
+ im = vis_utils.open_image(fn_abs)
304
+ image_size = [im.width,im.height]
305
+
306
+ # Generate patch boundaries (a list of [x,y] starting points)
307
+ patch_boundaries = get_patch_boundaries(image_size,patch_size,patch_stride)
308
+
309
+ # Extract patches
310
+ #
311
+ # patch_xy = patch_boundaries[0]
312
+ for patch_xy in patch_boundaries:
298
313
 
299
- # Generate patch boundaries (a list of [x,y] starting points)
300
- patch_boundaries = get_patch_boundaries(image_size,patch_size,patch_stride)
301
-
302
- # Extract patches
303
- #
304
- # patch_xy = patch_boundaries[0]
305
- patches = []
306
-
307
- for patch_xy in patch_boundaries:
314
+ patch_info = extract_patch_from_image(im,patch_xy,patch_size,
315
+ patch_folder=tiling_folder,
316
+ image_name=image_name,
317
+ overwrite=overwrite)
318
+ patch_info['source_fn'] = fn_relative
319
+ patches.append(patch_info)
308
320
 
309
- patch_info = extract_patch_from_image(im,patch_xy,patch_size,
310
- patch_folder=tiling_folder,
311
- image_name=image_name,
312
- overwrite=overwrite)
313
- patch_info['source_fn'] = fn_relative
314
- patches.append(patch_info)
321
+ except Exception as e:
322
+
323
+ s = 'Patch generation error for {}: \n{}'.format(fn_relative,str(e))
324
+ print(s)
325
+ # patches = None
326
+ error = s
315
327
 
316
328
  image_patch_info = {}
317
329
  image_patch_info['patches'] = patches
318
330
  image_patch_info['image_fn'] = fn_relative
331
+ image_patch_info['error'] = error
319
332
 
320
333
  return image_patch_info
321
334
 
@@ -327,7 +340,8 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
327
340
  checkpoint_path=None, checkpoint_frequency=-1, remove_tiles=False,
328
341
  yolo_inference_options=None,
329
342
  n_patch_extraction_workers=default_n_patch_extraction_workers,
330
- overwrite_tiles=True):
343
+ overwrite_tiles=True,
344
+ image_list=None):
331
345
  """
332
346
  Run inference using [model_file] on the images in [image_folder], fist splitting each image up
333
347
  into tiles of size [tile_size_x] x [tile_size_y], writing those tiles to [tiling_folder],
@@ -337,7 +351,8 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
337
351
  [tiling_folder] can be any folder, but this function reserves the right to do whatever it wants
338
352
  within that folder, including deleting everything, so it's best if it's a new folder.
339
353
  Conceptually this folder is temporary, it's just helpful in this case to not actually
340
- use the system temp folder, because the tile cache may be very large,
354
+ use the system temp folder, because the tile cache may be very large, so the caller may
355
+ want it to be on a specific drive.
341
356
 
342
357
  tile_overlap is the fraction of overlap between tiles.
343
358
 
@@ -346,25 +361,54 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
346
361
  if yolo_inference_options is supplied, it should be an instance of YoloInferenceOptions; in
347
362
  this case the model will be run with run_inference_with_yolov5_val. This is typically used to
348
363
  run the model with test-time augmentation.
349
- """
364
+ """
350
365
 
351
366
  ##%% Validate arguments
352
367
 
353
368
  assert tile_overlap < 1 and tile_overlap >= 0, \
354
369
  'Illegal tile overlap value {}'.format(tile_overlap)
355
370
 
371
+ if tile_size_x == -1:
372
+ tile_size_x = default_tile_size[0]
373
+ if tile_size_y == -1:
374
+ tile_size_y = default_tile_size[1]
375
+
356
376
  patch_size = [tile_size_x,tile_size_y]
357
377
  patch_stride = (round(patch_size[0]*(1.0-tile_overlap)),
358
378
  round(patch_size[1]*(1.0-tile_overlap)))
359
379
 
360
380
  os.makedirs(tiling_folder,exist_ok=True)
361
381
 
362
-
363
382
  ##%% List files
364
383
 
365
- image_files_relative = path_utils.find_images(image_folder, recursive=True, return_relative_paths=True)
366
- assert len(image_files_relative) > 0, 'No images found in folder {}'.format(image_folder)
367
-
384
+ if image_list is None:
385
+
386
+ print('Enumerating images in {}'.format(image_folder))
387
+ image_files_relative = path_utils.find_images(image_folder, recursive=True, return_relative_paths=True)
388
+ assert len(image_files_relative) > 0, 'No images found in folder {}'.format(image_folder)
389
+
390
+ else:
391
+
392
+ print('Loading image list from {}'.format(image_list))
393
+ with open(image_list,'r') as f:
394
+ image_files_relative = json.load(f)
395
+ n_absolute_paths = 0
396
+ for i_fn,fn in enumerate(image_files_relative):
397
+ if os.path.isabs(fn):
398
+ n_absolute_paths += 1
399
+ try:
400
+ fn_relative = os.path.relpath(fn,image_folder)
401
+ except ValueError:
402
+ 'Illegal absolute path supplied to run_tiled_inference, {} is outside of {}'.format(
403
+ fn,image_folder)
404
+ raise
405
+ assert not fn_relative.startswith('..'), \
406
+ 'Illegal absolute path supplied to run_tiled_inference, {} is outside of {}'.format(
407
+ fn,image_folder)
408
+ image_files_relative[i_fn] = fn_relative
409
+ if (n_absolute_paths != 0) and (n_absolute_paths != len(image_files_relative)):
410
+ raise ValueError('Illegal file list: converted {} of {} paths to relative'.format(
411
+ n_absolute_paths,len(image_files_relative)))
368
412
 
369
413
  ##%% Generate tiles
370
414
 
@@ -414,7 +458,7 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
414
458
  image_files_relative),total=len(image_files_relative)))
415
459
 
416
460
  # ...for each image
417
-
461
+
418
462
  # Write tile information to file; this is just a debugging convenience
419
463
  folder_name = path_utils.clean_filename(image_folder,force_lower=True)
420
464
  if folder_name.startswith('_'):
@@ -424,9 +468,16 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
424
468
  with open(tile_cache_file,'w') as f:
425
469
  json.dump(all_image_patch_info,f,indent=1)
426
470
 
471
+ # Keep track of patches that failed
472
+ images_with_patch_errors = {}
473
+ for patch_info in all_image_patch_info:
474
+ if patch_info['error'] is not None:
475
+ images_with_patch_errors[patch_info['image_fn']] = patch_info
476
+
427
477
 
428
478
  ##%% Run inference on tiles
429
479
 
480
+ # When running with run_inference_with_yolov5_val, we'll pass the folder
430
481
  if yolo_inference_options is not None:
431
482
 
432
483
  patch_level_output_file = os.path.join(tiling_folder,folder_name + '_patch_level_results.json')
@@ -444,11 +495,16 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
444
495
  run_inference_with_yolo_val(yolo_inference_options)
445
496
  with open(patch_level_output_file,'r') as f:
446
497
  patch_level_results = json.load(f)
447
-
498
+
499
+ # For standard inference, we'll pass a list of files
448
500
  else:
449
501
 
450
502
  patch_file_names = []
451
503
  for im in all_image_patch_info:
504
+ # If there was a patch generation error, don't run inference
505
+ if patch_info['error'] is not None:
506
+ assert im['image_fn'] in images_with_patch_errors
507
+ continue
452
508
  for patch in im['patches']:
453
509
  patch_file_names.append(patch['patch_fn'])
454
510
 
@@ -481,18 +537,44 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
481
537
  image_fn_relative_to_patch_info = { x['image_fn']:x for x in all_image_patch_info }
482
538
 
483
539
  # i_image = 0; image_fn_relative = image_files_relative[i_image]
484
- for i_image,image_fn_relative in tqdm(enumerate(image_files_relative),total=len(image_files_relative)):
540
+ for i_image,image_fn_relative in tqdm(enumerate(image_files_relative),
541
+ total=len(image_files_relative)):
485
542
 
486
543
  image_fn_abs = os.path.join(image_folder,image_fn_relative)
487
544
  assert os.path.isfile(image_fn_abs)
488
545
 
489
546
  output_im = {}
490
547
  output_im['file'] = image_fn_relative
491
- output_im['detections'] = []
548
+
549
+ # If we had a patch generation error
550
+ if image_fn_relative in images_with_patch_errors:
492
551
 
493
- pil_im = vis_utils.open_image(image_fn_abs)
494
- image_w = pil_im.size[0]
495
- image_h = pil_im.size[1]
552
+ patch_info = image_fn_relative_to_patch_info[image_fn_relative]
553
+ assert patch_info['error'] is not None
554
+
555
+ output_im['detections'] = None
556
+ output_im['failure'] = 'Patch generation error'
557
+ output_im['failure_details'] = patch_info['error']
558
+ image_level_results['images'].append(output_im)
559
+ continue
560
+
561
+ try:
562
+ pil_im = vis_utils.open_image(image_fn_abs)
563
+ image_w = pil_im.size[0]
564
+ image_h = pil_im.size[1]
565
+
566
+ # This would be a very unusual situation; we're reading back an image here that we already
567
+ # (successfully) read once during patch generation.
568
+ except Exception as e:
569
+ print('Warning: image read error after successful patch generation for {}:\n{}'.format(
570
+ image_fn_relative,str(e)))
571
+ output_im['detections'] = None
572
+ output_im['failure'] = 'Patch processing error'
573
+ output_im['failure_details'] = str(e)
574
+ image_level_results['images'].append(output_im)
575
+ continue
576
+
577
+ output_im['detections'] = []
496
578
 
497
579
  image_patch_info = image_fn_relative_to_patch_info[image_fn_relative]
498
580
  assert image_patch_info['patches'][0]['source_fn'] == image_fn_relative
@@ -520,6 +602,14 @@ def run_tiled_inference(model_file, image_folder, tiling_folder, output_file,
520
602
  assert patch_w == patch_size[0]
521
603
  assert patch_h == patch_size[1]
522
604
 
605
+ # If there was an inference failure on one patch, report the image
606
+ # as an inference failure
607
+ if 'detections' not in patch_results:
608
+ assert 'failure' in patch_results
609
+ output_im['detections'] = None
610
+ output_im['failure'] = patch_results['failure']
611
+ break
612
+
523
613
  # det = patch_results['detections'][0]
524
614
  for det in patch_results['detections']:
525
615
 
@@ -703,7 +793,7 @@ def main():
703
793
  help='Path to detector model file (.pb or .pt)')
704
794
  parser.add_argument(
705
795
  'image_folder',
706
- help='Folder containing images for inference (always recursive)')
796
+ help='Folder containing images for inference (always recursive, unless image_list is supplied)')
707
797
  parser.add_argument(
708
798
  'tiling_folder',
709
799
  help='Temporary folder where tiles and intermediate results will be stored')
@@ -729,6 +819,16 @@ def main():
729
819
  type=float,
730
820
  default=default_patch_overlap,
731
821
  help=('Overlap between tiles [0,1] (defaults to {})'.format(default_patch_overlap)))
822
+ parser.add_argument(
823
+ '--overwrite_handling',
824
+ type=str,
825
+ default='skip',
826
+ help=('Behavior when the target file exists (skip/overwrite/error) (default skip)'))
827
+ parser.add_argument(
828
+ '--image_list',
829
+ type=str,
830
+ default=None,
831
+ help=('A .json list of relative filenames (or absolute paths contained within image_folder) to include'))
732
832
 
733
833
  if len(sys.argv[1:]) == 0:
734
834
  parser.print_help()
@@ -736,19 +836,29 @@ def main():
736
836
 
737
837
  args = parser.parse_args()
738
838
 
739
- assert os.path.exists(args.model_file), \
839
+ model_file = try_download_known_detector(args.model_file)
840
+ assert os.path.exists(model_file), \
740
841
  'detector file {} does not exist'.format(args.model_file)
741
-
842
+
742
843
  if os.path.exists(args.output_file):
743
- print('Warning: output_file {} already exists and will be overwritten'.format(
744
- args.output_file))
844
+ if args.overwrite_handling == 'skip':
845
+ print('Warning: output file {} exists, skipping'.format(args.output_file))
846
+ return
847
+ elif args.overwrite_handling == 'overwrite':
848
+ print('Warning: output file {} exists, overwriting'.format(args.output_file))
849
+ elif args.overwrite_handling == 'error':
850
+ raise ValueError('Output file {} exists'.format(args.output_file))
851
+ else:
852
+ raise ValueError('Unknown output handling method {}'.format(args.overwrite_handling))
853
+
745
854
 
746
855
  remove_tiles = (not args.no_remove_tiles)
747
856
 
748
- run_tiled_inference(args.model_file, args.image_folder, args.tiling_folder, args.output_file,
857
+ run_tiled_inference(model_file, args.image_folder, args.tiling_folder, args.output_file,
749
858
  tile_size_x=args.tile_size_x, tile_size_y=args.tile_size_y,
750
859
  tile_overlap=args.tile_overlap,
751
- remove_tiles=remove_tiles)
860
+ remove_tiles=remove_tiles,
861
+ image_list=args.image_list)
752
862
 
753
863
  if __name__ == '__main__':
754
864
  main()
detection/tf_detector.py CHANGED
@@ -122,7 +122,8 @@ class TFDetector:
122
122
  detection_threshold: confidence above which to include the detection proposal
123
123
 
124
124
  Returns:
125
- A dict with the following fields, see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
125
+ A dict with the following fields, see the 'images' key in:
126
+ https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
126
127
  - 'file' (always present)
127
128
  - 'max_detection_conf'
128
129
  - 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox'
detection/video_utils.py CHANGED
@@ -24,6 +24,8 @@ from md_utils import path_utils
24
24
 
25
25
  from md_visualization import visualization_utils as vis_utils
26
26
 
27
+ default_fourcc = 'h264'
28
+
27
29
 
28
30
  #%% Path utilities
29
31
 
@@ -76,7 +78,7 @@ def find_videos(dirname: str, recursive: bool = False,
76
78
 
77
79
  # http://tsaith.github.io/combine-images-into-a-video-with-python-3-and-opencv-3.html
78
80
 
79
- def frames_to_video(images, Fs, output_file_name, codec_spec='h264'):
81
+ def frames_to_video(images, Fs, output_file_name, codec_spec=default_fourcc):
80
82
  """
81
83
  Given a list of image files and a sample rate, concatenate those images into
82
84
  a video and write to [output_file_name].
@@ -308,7 +310,7 @@ def video_folder_to_frames(input_folder:str, output_folder_base:str,
308
310
 
309
311
  class FrameToVideoOptions:
310
312
 
311
- # zero-indexed
313
+ # One-indexed, i.e. "1" means "use the confidence value from the highest-confidence frame"
312
314
  nth_highest_confidence = 1
313
315
 
314
316
  # 'error' or 'skip_with_warning'
md_utils/ct_utils.py CHANGED
@@ -39,9 +39,13 @@ def truncate_float_array(xs, precision=3):
39
39
 
40
40
  def truncate_float(x, precision=3):
41
41
  """
42
- Truncates a floating-point value to a specific number of significant digits.
42
+ Truncates the fractional portion of a floating-point value to a specific number of
43
+ floating-point digits.
43
44
 
44
- For example: truncate_float(0.0003214884) --> 0.000321
45
+ For example:
46
+
47
+ truncate_float(0.0003214884) --> 0.000321
48
+ truncate_float(1.0003214884) --> 1.000321
45
49
 
46
50
  This function is primarily used to achieve a certain float representation
47
51
  before exporting to JSON.
@@ -58,13 +62,18 @@ def truncate_float(x, precision=3):
58
62
 
59
63
  return 0
60
64
 
65
+ elif (x > 1):
66
+
67
+ fractional_component = x - 1.0
68
+ return 1 + truncate_float(fractional_component)
69
+
61
70
  else:
62
71
 
63
72
  # Determine the factor, which shifts the decimal point of x
64
73
  # just behind the last significant digit.
65
74
  factor = math.pow(10, precision - 1 - math.floor(math.log10(abs(x))))
66
75
 
67
- # Shift decimal point by multiplicatipon with factor, flooring, and
76
+ # Shift decimal point by multiplication with factor, flooring, and
68
77
  # division by factor.
69
78
  return math.floor(x * factor)/factor
70
79
 
@@ -174,6 +183,7 @@ def convert_xywh_to_xyxy(api_bbox):
174
183
  Converts an xywh bounding box to an xyxy bounding box.
175
184
 
176
185
  Note that this is also different from the TensorFlow Object Detection API coords format.
186
+
177
187
  Args:
178
188
  api_bbox: bbox output by the batch processing API [x_min, y_min, width_of_box, height_of_box]
179
189
 
@@ -266,6 +276,7 @@ def point_dist(p1,p2):
266
276
  """
267
277
  Distance between two points, represented as length-two tuples.
268
278
  """
279
+
269
280
  return math.sqrt( ((p1[0]-p2[0])**2) + ((p1[1]-p2[1])**2) )
270
281
 
271
282
 
@@ -328,23 +339,107 @@ def split_list_into_fixed_size_chunks(L,n):
328
339
  return [L[i * n:(i + 1) * n] for i in range((len(L) + n - 1) // n )]
329
340
 
330
341
 
331
- def split_list_into_n_chunks(L, n):
342
+ def split_list_into_n_chunks(L, n, chunk_strategy='greedy'):
332
343
  """
333
344
  Splits the list or tuple L into n equally-sized chunks (some chunks may be one
334
345
  element smaller than others, i.e. len(L) does not have to be a multiple of n.
346
+
347
+ chunk_strategy can be "greedy" (default, if there are k samples per chunk, the first
348
+ k go into the first chunk) or "balanced" (alternate between chunks when pulling
349
+ items from the list).
350
+ """
351
+
352
+ if chunk_strategy == 'greedy':
353
+ k, m = divmod(len(L), n)
354
+ return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
355
+ elif chunk_strategy == 'balanced':
356
+ chunks = [ [] for _ in range(n) ]
357
+ for i_item,item in enumerate(L):
358
+ i_chunk = i_item % n
359
+ chunks[i_chunk].append(item)
360
+ return chunks
361
+ else:
362
+ raise ValueError('Invalid chunk strategy: {}'.format(chunk_strategy))
363
+
364
+
365
+ def sort_dictionary_by_key(d,reverse=False):
366
+ """
367
+ Sorts the dictionary [d] by key.
335
368
  """
336
369
 
337
- k, m = divmod(len(L), n)
338
- return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
370
+ d = dict(sorted(d.items(),reverse=reverse))
371
+ return d
372
+
339
373
 
374
+ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
375
+ """
376
+ Sorts the dictionary [d] by value. If sort_values is None, uses d.values(),
377
+ otherwise uses the dictionary sort_values as the sorting criterion.
378
+ """
379
+
380
+ if sort_values is None:
381
+ d = {k: v for k, v in sorted(d.items(), key=lambda item: item[1], reverse=reverse)}
382
+ else:
383
+ d = {k: v for k, v in sorted(d.items(), key=lambda item: sort_values[item[0]], reverse=reverse)}
384
+ return d
340
385
 
341
386
 
387
+ def invert_dictionary(d):
388
+ """
389
+ Create a new dictionary that maps d.values() to d.keys(). Does not check
390
+ uniqueness.
391
+ """
392
+
393
+ return {v: k for k, v in d.items()}
394
+
395
+
396
+ def image_file_to_camera_folder(image_fn):
397
+ """
398
+ Remove common overflow folders (e.g. RECNX101, RECNX102) from paths, i.e. turn:
399
+
400
+ a\b\c\RECNX101\image001.jpg
401
+
402
+ ...into:
403
+
404
+ a\b\c
405
+
406
+ Returns the same thing as os.dirname() (i.e., just the folder name) if no overflow folders are
407
+ present.
408
+
409
+ Always converts backslashes to slashes.
410
+ """
411
+
412
+ import re
413
+
414
+ # 100RECNX is the overflow folder style for Reconyx cameras
415
+ # 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
416
+ # 100_BTCF is the overflow folder style for Browning cameras
417
+ # 100MEDIA is the overflow folder style used on a number of consumer-grade cameras
418
+ patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/','\/\d+MEDIA\/']
419
+
420
+ image_fn = image_fn.replace('\\','/')
421
+ for pat in patterns:
422
+ image_fn = re.sub(pat,'/',image_fn)
423
+ camera_folder = os.path.dirname(image_fn)
424
+
425
+ return camera_folder
426
+
427
+
342
428
  #%% Test drivers
343
429
 
344
430
  if False:
345
431
 
346
432
  pass
347
433
 
434
+ #%% Test image_file_to_camera_folder()
435
+
436
+ relative_path = 'a/b/c/d/100EK113/blah.jpg'
437
+ print(image_file_to_camera_folder(relative_path))
438
+
439
+ relative_path = 'a/b/c/d/100RECNX/blah.jpg'
440
+ print(image_file_to_camera_folder(relative_path))
441
+
442
+
348
443
  #%% Test a few rectangle distances
349
444
 
350
445
  r1 = [0,0,1,1]; r2 = [0,0,1,1]; assert rect_distance(r1,r2)==0