megadetector 5.0.6__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (62) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +278 -197
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  5. api/batch_processing/postprocessing/load_api_results.py +55 -69
  6. api/batch_processing/postprocessing/md_to_labelme.py +1 -0
  7. api/batch_processing/postprocessing/postprocess_batch_results.py +158 -50
  8. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  9. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  10. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  11. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +222 -74
  12. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  13. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  14. classification/prepare_classification_script.py +191 -191
  15. data_management/coco_to_yolo.py +65 -44
  16. data_management/databases/integrity_check_json_db.py +7 -5
  17. data_management/generate_crops_from_cct.py +1 -1
  18. data_management/importers/animl_results_to_md_results.py +2 -2
  19. data_management/importers/noaa_seals_2019.py +1 -1
  20. data_management/importers/zamba_results_to_md_results.py +2 -2
  21. data_management/labelme_to_coco.py +34 -6
  22. data_management/labelme_to_yolo.py +1 -1
  23. data_management/lila/create_lila_blank_set.py +474 -0
  24. data_management/lila/create_lila_test_set.py +2 -1
  25. data_management/lila/create_links_to_md_results_files.py +1 -1
  26. data_management/lila/download_lila_subset.py +46 -21
  27. data_management/lila/generate_lila_per_image_labels.py +23 -14
  28. data_management/lila/get_lila_annotation_counts.py +16 -10
  29. data_management/lila/lila_common.py +14 -11
  30. data_management/lila/test_lila_metadata_urls.py +116 -0
  31. data_management/resize_coco_dataset.py +12 -10
  32. data_management/yolo_output_to_md_output.py +40 -13
  33. data_management/yolo_to_coco.py +34 -21
  34. detection/process_video.py +36 -14
  35. detection/pytorch_detector.py +1 -1
  36. detection/run_detector.py +73 -18
  37. detection/run_detector_batch.py +104 -24
  38. detection/run_inference_with_yolov5_val.py +127 -26
  39. detection/run_tiled_inference.py +153 -43
  40. detection/video_utils.py +3 -1
  41. md_utils/ct_utils.py +79 -3
  42. md_utils/md_tests.py +253 -15
  43. md_utils/path_utils.py +129 -24
  44. md_utils/process_utils.py +26 -7
  45. md_utils/split_locations_into_train_val.py +215 -0
  46. md_utils/string_utils.py +10 -0
  47. md_utils/url_utils.py +0 -2
  48. md_utils/write_html_image_list.py +1 -0
  49. md_visualization/visualization_utils.py +17 -2
  50. md_visualization/visualize_db.py +8 -0
  51. md_visualization/visualize_detector_output.py +185 -104
  52. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/METADATA +2 -2
  53. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/RECORD +62 -58
  54. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  55. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  56. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  57. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  58. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  59. taxonomy_mapping/species_lookup.py +33 -13
  60. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  61. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  62. {megadetector-5.0.6.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
md_utils/ct_utils.py CHANGED
@@ -266,6 +266,7 @@ def point_dist(p1,p2):
266
266
  """
267
267
  Distance between two points, represented as length-two tuples.
268
268
  """
269
+
269
270
  return math.sqrt( ((p1[0]-p2[0])**2) + ((p1[1]-p2[1])**2) )
270
271
 
271
272
 
@@ -328,16 +329,82 @@ def split_list_into_fixed_size_chunks(L,n):
328
329
  return [L[i * n:(i + 1) * n] for i in range((len(L) + n - 1) // n )]
329
330
 
330
331
 
331
- def split_list_into_n_chunks(L, n):
332
+ def split_list_into_n_chunks(L, n, chunk_strategy='greedy'):
332
333
  """
333
334
  Splits the list or tuple L into n equally-sized chunks (some chunks may be one
334
335
  element smaller than others, i.e. len(L) does not have to be a multiple of n.
336
+
337
+ chunk_strategy can be "greedy" (default, if there are k samples per chunk, the first
338
+ k go into the first chunk) or "balanced" (alternate between chunks when pulling
339
+ items from the list).
340
+ """
341
+
342
+ if chunk_strategy == 'greedy':
343
+ k, m = divmod(len(L), n)
344
+ return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
345
+ elif chunk_strategy == 'balanced':
346
+ chunks = [ [] for _ in range(n) ]
347
+ for i_item,item in enumerate(L):
348
+ i_chunk = i_item % n
349
+ chunks[i_chunk].append(item)
350
+ return chunks
351
+ else:
352
+ raise ValueError('Invalid chunk strategy: {}'.format(chunk_strategy))
353
+
354
+
355
+ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
356
+ """
357
+ Sorts the dictionary [d] by value. If sort_values is None, uses d.values(),
358
+ otherwise uses the dictionary sort_values as the sorting criterion.
359
+ """
360
+
361
+ if sort_values is None:
362
+ d = {k: v for k, v in sorted(d.items(), key=lambda item: item[1], reverse=reverse)}
363
+ else:
364
+ d = {k: v for k, v in sorted(d.items(), key=lambda item: sort_values[item[0]], reverse=reverse)}
365
+ return d
366
+
367
+
368
+ def invert_dictionary(d):
369
+ """
370
+ Create a new dictionary that maps d.values() to d.keys(). Does not check
371
+ uniqueness.
372
+ """
373
+
374
+ return {v: k for k, v in d.items()}
375
+
376
+
377
+ def image_file_to_camera_folder(image_fn):
335
378
  """
379
+ Remove common overflow folders (e.g. RECNX101, RECNX102) from paths, i.e. turn:
380
+
381
+ a\b\c\RECNX101\image001.jpg
336
382
 
337
- k, m = divmod(len(L), n)
338
- return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
383
+ ...into:
384
+
385
+ a\b\c
339
386
 
387
+ Returns the same thing as os.dirname() (i.e., just the folder name) if no overflow folders are
388
+ present.
340
389
 
390
+ Always converts backslashes to slashes.
391
+ """
392
+
393
+ import re
394
+
395
+ # 100RECNX is the overflow folder style for Reconyx cameras
396
+ # 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
397
+ # 100_BTCF is the overflow folder style for Browning cameras
398
+ # 100MEDIA is the overflow folder style used on a number of consumer-grade cameras
399
+ patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/','\/\d+MEDIA\/']
400
+
401
+ image_fn = image_fn.replace('\\','/')
402
+ for pat in patterns:
403
+ image_fn = re.sub(pat,'/',image_fn)
404
+ camera_folder = os.path.dirname(image_fn)
405
+
406
+ return camera_folder
407
+
341
408
 
342
409
  #%% Test drivers
343
410
 
@@ -345,6 +412,15 @@ if False:
345
412
 
346
413
  pass
347
414
 
415
+ #%% Test image_file_to_camera_folder()
416
+
417
+ relative_path = 'a/b/c/d/100EK113/blah.jpg'
418
+ print(image_file_to_camera_folder(relative_path))
419
+
420
+ relative_path = 'a/b/c/d/100RECNX/blah.jpg'
421
+ print(image_file_to_camera_folder(relative_path))
422
+
423
+
348
424
  #%% Test a few rectangle distances
349
425
 
350
426
  r1 = [0,0,1,1]; r2 = [0,0,1,1]; assert rect_distance(r1,r2)==0
md_utils/md_tests.py CHANGED
@@ -1,24 +1,17 @@
1
1
  ########
2
2
  #
3
- # md-tests.py
3
+ # md_tests.py
4
4
  #
5
5
  # A series of tests to validate basic repo functionality and verify either "correct"
6
6
  # inference behavior, or - when operating in environments other than the training
7
7
  # environment - acceptable deviation from the correct results.
8
8
  #
9
- # This module should not depend on anything else in this repo outside of the,
10
- # tests themselves, even if it means some duplicated code (e.g. for downloading files), since
11
- # much of what it tries to test is, e.g., imports.
9
+ # This module should not depend on anything else in this repo outside of the
10
+ # tests themselves, even if it means some duplicated code (e.g. for downloading files),
11
+ # since much of what it tries to test is, e.g., imports.
12
12
  #
13
13
  ########
14
14
 
15
- #%% TODO
16
-
17
- # Video tests
18
- # Augmented inference tests
19
- # Checkpoint tests
20
-
21
-
22
15
  #%% Imports and constants
23
16
 
24
17
  ### Only standard imports belong here, not MD-specific imports ###
@@ -54,6 +47,7 @@ class MDTestOptions:
54
47
  max_coord_error = 0.001
55
48
  max_conf_error = 0.005
56
49
  cli_working_dir = None
50
+ yolo_working_folder = None
57
51
 
58
52
 
59
53
  #%% Support functions
@@ -119,9 +113,9 @@ def download_test_data(options):
119
113
  if download_zipfile:
120
114
  print('Downloading test data zipfile')
121
115
  urllib.request.urlretrieve(options.test_data_url, local_zipfile)
122
- print('Finished download')
116
+ print('Finished download to {}'.format(local_zipfile))
123
117
  else:
124
- print('Bypassing test data zipfile download')
118
+ print('Bypassing test data zipfile download for {}'.format(local_zipfile))
125
119
 
126
120
 
127
121
  ## Unzip data
@@ -164,6 +158,7 @@ def download_test_data(options):
164
158
  options.all_test_files = test_files
165
159
  options.test_images = [fn for fn in test_files if os.path.splitext(fn.lower())[1] in ('.jpg','.jpeg','.png')]
166
160
  options.test_videos = [fn for fn in test_files if os.path.splitext(fn.lower())[1] in ('.mp4','.avi')]
161
+ options.test_videos = [fn for fn in options.test_videos if 'rendered' not in fn]
167
162
 
168
163
  # ...def download_test_data(...)
169
164
 
@@ -247,6 +242,8 @@ def execute_and_print(cmd,print_output=True):
247
242
 
248
243
  def run_python_tests(options):
249
244
 
245
+ print('\n*** Starting module tests ***\n')
246
+
250
247
  ## Prepare data
251
248
 
252
249
  download_test_data(options)
@@ -393,7 +390,84 @@ def run_python_tests(options):
393
390
  assert os.path.isfile(rde_results.filterFile),\
394
391
  'Could not find RDE output file {}'.format(rde_results.filterFile)
395
392
 
396
- print('Finished running Python tests')
393
+
394
+ # TODO: add remove_repeat_detections test here
395
+ #
396
+ # It's already tested in the CLI tests, so this is not urgent.
397
+
398
+
399
+ ## Video test (single video)
400
+
401
+ from detection.process_video import ProcessVideoOptions, process_video
402
+
403
+ video_options = ProcessVideoOptions()
404
+ video_options.model_file = 'MDV5A'
405
+ video_options.input_video_file = os.path.join(options.scratch_dir,options.test_videos[0])
406
+ video_options.output_json_file = os.path.join(options.scratch_dir,'single_video_output.json')
407
+ video_options.output_video_file = os.path.join(options.scratch_dir,'video_scratch/rendered_video.mp4')
408
+ video_options.frame_folder = os.path.join(options.scratch_dir,'video_scratch/frame_folder')
409
+ video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
410
+ video_options.render_output_video = True
411
+ # video_options.keep_rendered_frames = False
412
+ # video_options.keep_rendered_frames = False
413
+ video_options.force_extracted_frame_folder_deletion = True
414
+ video_options.force_rendered_frame_folder_deletion = True
415
+ # video_options.reuse_results_if_available = False
416
+ # video_options.reuse_frames_if_available = False
417
+ video_options.recursive = True
418
+ video_options.verbose = False
419
+ video_options.fourcc = 'mp4v'
420
+ # video_options.rendering_confidence_threshold = None
421
+ # video_options.json_confidence_threshold = 0.005
422
+ video_options.frame_sample = 5
423
+ video_options.n_cores = 5
424
+ # video_options.debug_max_frames = -1
425
+ # video_options.class_mapping_filename = None
426
+
427
+ _ = process_video(video_options)
428
+
429
+ assert os.path.isfile(video_options.output_video_file), \
430
+ 'Python video test failed to render output video file'
431
+ assert os.path.isfile(video_options.output_json_file), \
432
+ 'Python video test failed to render output .json file'
433
+
434
+
435
+ ## Video test (folder)
436
+
437
+ from detection.process_video import ProcessVideoOptions, process_video_folder
438
+
439
+ video_options = ProcessVideoOptions()
440
+ video_options.model_file = 'MDV5A'
441
+ video_options.input_video_file = os.path.join(options.scratch_dir,
442
+ os.path.dirname(options.test_videos[0]))
443
+ video_options.output_json_file = os.path.join(options.scratch_dir,'video_folder_output.json')
444
+ # video_options.output_video_file = None
445
+ video_options.frame_folder = os.path.join(options.scratch_dir,'video_scratch/frame_folder')
446
+ video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
447
+ video_options.render_output_video = False
448
+ # video_options.keep_rendered_frames = False
449
+ # video_options.keep_rendered_frames = False
450
+ video_options.force_extracted_frame_folder_deletion = True
451
+ video_options.force_rendered_frame_folder_deletion = True
452
+ # video_options.reuse_results_if_available = False
453
+ # video_options.reuse_frames_if_available = False
454
+ video_options.recursive = True
455
+ video_options.verbose = False
456
+ # video_options.fourcc = None
457
+ # video_options.rendering_confidence_threshold = None
458
+ # video_options.json_confidence_threshold = 0.005
459
+ video_options.frame_sample = 5
460
+ video_options.n_cores = 5
461
+ # video_options.debug_max_frames = -1
462
+ # video_options.class_mapping_filename = None
463
+
464
+ _ = process_video_folder(video_options)
465
+
466
+ assert os.path.isfile(video_options.output_json_file), \
467
+ 'Python video test failed to render output .json file'
468
+
469
+
470
+ print('\n*** Finished module tests ***\n')
397
471
 
398
472
  # ...def run_python_tests(...)
399
473
 
@@ -402,6 +476,8 @@ def run_python_tests(options):
402
476
 
403
477
  def run_cli_tests(options):
404
478
 
479
+ print('\n*** Starting CLI tests ***\n')
480
+
405
481
  ## chdir if necessary
406
482
 
407
483
  if options.cli_working_dir is not None:
@@ -473,6 +549,158 @@ def run_cli_tests(options):
473
549
  print('Running: {}'.format(cmd))
474
550
  cmd_results = execute_and_print(cmd)
475
551
 
552
+
553
+ ## RDE
554
+
555
+ rde_output_dir = os.path.join(options.scratch_dir,'rde_output_cli')
556
+
557
+ if options.cli_working_dir is None:
558
+ cmd = 'python -m api.batch_processing.postprocessing.repeat_detection_elimination.find_repeat_detections'
559
+ else:
560
+ cmd = 'python api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py'
561
+ cmd += ' {}'.format(inference_output_file)
562
+ cmd += ' --imageBase {}'.format(image_folder)
563
+ cmd += ' --outputBase {}'.format(rde_output_dir)
564
+ cmd += ' --occurrenceThreshold 1' # Use an absurd number here to make sure we get some suspicious detections
565
+ print('Running: {}'.format(cmd))
566
+ cmd_results = execute_and_print(cmd)
567
+
568
+ # Find the latest filtering folder
569
+ filtering_output_dir = os.listdir(rde_output_dir)
570
+ filtering_output_dir = [fn for fn in filtering_output_dir if fn.startswith('filtering_')]
571
+ filtering_output_dir = [os.path.join(rde_output_dir,fn) for fn in filtering_output_dir]
572
+ filtering_output_dir = [fn for fn in filtering_output_dir if os.path.isdir(fn)]
573
+ filtering_output_dir = sorted(filtering_output_dir)[-1]
574
+
575
+ print('Using RDE filtering folder {}'.format(filtering_output_dir))
576
+
577
+ filtered_output_file = inference_output_file.replace('.json','_filtered.json')
578
+
579
+ if options.cli_working_dir is None:
580
+ cmd = 'python -m api.batch_processing.postprocessing.repeat_detection_elimination.remove_repeat_detections'
581
+ else:
582
+ cmd = 'python api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py'
583
+ cmd += ' {} {} {}'.format(inference_output_file,filtered_output_file,filtering_output_dir)
584
+ print('Running: {}'.format(cmd))
585
+ cmd_results = execute_and_print(cmd)
586
+
587
+ assert os.path.isfile(filtered_output_file), \
588
+ 'Could not find RDE output file {}'.format(filtered_output_file)
589
+
590
+
591
+ ## Run inference on a folder (tiled)
592
+
593
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
594
+ tiling_folder = os.path.join(options.scratch_dir,'tiling-folder')
595
+ inference_output_file_tiled = os.path.join(options.scratch_dir,'folder_inference_output_tiled.json')
596
+ if options.cli_working_dir is None:
597
+ cmd = 'python -m detection.run_tiled_inference'
598
+ else:
599
+ cmd = 'python detection/run_tiled_inference.py'
600
+ cmd += ' {} {} {} {}'.format(
601
+ model_file,image_folder,tiling_folder,inference_output_file_tiled)
602
+ cmd += ' --overwrite_handling overwrite'
603
+ print('Running: {}'.format(cmd))
604
+ cmd_results = execute_and_print(cmd)
605
+
606
+ with open(inference_output_file_tiled,'r') as f:
607
+ results_from_file = json.load(f) # noqa
608
+
609
+
610
+ ## Run inference on a folder (augmented)
611
+
612
+ if options.yolo_working_folder is None:
613
+
614
+ print('Bypassing YOLOv5 val tests, no yolo folder supplied')
615
+
616
+ else:
617
+
618
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
619
+ yolo_results_folder = os.path.join(options.scratch_dir,'yolo-output-folder')
620
+ yolo_symlink_folder = os.path.join(options.scratch_dir,'yolo-symlink_folder')
621
+ inference_output_file_yolo_val = os.path.join(options.scratch_dir,'folder_inference_output_yolo_val.json')
622
+ if options.cli_working_dir is None:
623
+ cmd = 'python -m detection.run_inference_with_yolov5_val'
624
+ else:
625
+ cmd = 'python detection/run_inference_with_yolov5_val.py'
626
+ cmd += ' {} {} {}'.format(
627
+ model_file,image_folder,inference_output_file_yolo_val)
628
+ cmd += ' --yolo_working_folder {}'.format(options.yolo_working_folder)
629
+ cmd += ' --yolo_results_folder {}'.format(yolo_results_folder)
630
+ cmd += ' --symlink_folder {}'.format(yolo_symlink_folder)
631
+ cmd += ' --augment_enabled 1'
632
+ # cmd += ' --no_use_symlinks'
633
+ cmd += ' --overwrite_handling overwrite'
634
+ print('Running: {}'.format(cmd))
635
+ cmd_results = execute_and_print(cmd)
636
+
637
+ with open(inference_output_file_yolo_val,'r') as f:
638
+ results_from_file = json.load(f) # noqa
639
+
640
+
641
+ ## Video test
642
+
643
+ model_file = 'MDV5A'
644
+ video_inference_output_file = os.path.join(options.scratch_dir,'video_inference_output.json')
645
+ output_video_file = os.path.join(options.scratch_dir,'video_scratch/cli_rendered_video.mp4')
646
+ frame_folder = os.path.join(options.scratch_dir,'video_scratch/frame_folder_cli')
647
+ frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder_cli')
648
+
649
+ video_fn = os.path.join(options.scratch_dir,options.test_videos[-1])
650
+ output_dir = os.path.join(options.scratch_dir,'single_video_test_cli')
651
+ if options.cli_working_dir is None:
652
+ cmd = 'python -m detection.process_video'
653
+ else:
654
+ cmd = 'python detection/process_video.py'
655
+ cmd += ' {} {}'.format(model_file,video_fn)
656
+ cmd += ' --frame_folder {} --frame_rendering_folder {} --output_json_file {} --output_video_file {}'.format(
657
+ frame_folder,frame_rendering_folder,video_inference_output_file,output_video_file)
658
+ cmd += ' --render_output_video --fourcc mp4v'
659
+ cmd += ' --force_extracted_frame_folder_deletion --force_rendered_frame_folder_deletion --n_cores 5 --frame_sample 3'
660
+ print('Running: {}'.format(cmd))
661
+ cmd_results = execute_and_print(cmd)
662
+
663
+
664
+ ## Run inference on a folder (again, so we can do a comparison)
665
+
666
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
667
+ model_file = 'MDV5B'
668
+ inference_output_file_alt = os.path.join(options.scratch_dir,'folder_inference_output_alt.json')
669
+ if options.cli_working_dir is None:
670
+ cmd = 'python -m detection.run_detector_batch'
671
+ else:
672
+ cmd = 'python detection/run_detector_batch.py'
673
+ cmd += ' {} {} {} --recursive'.format(
674
+ model_file,image_folder,inference_output_file_alt)
675
+ cmd += ' --output_relative_filenames --quiet --include_image_size'
676
+ cmd += ' --include_image_timestamp --include_exif_data'
677
+ print('Running: {}'.format(cmd))
678
+ cmd_results = execute_and_print(cmd)
679
+
680
+ with open(inference_output_file_alt,'r') as f:
681
+ results_from_file = json.load(f) # noqa
682
+
683
+
684
+ ## Compare the two files
685
+
686
+ comparison_output_folder = os.path.join(options.scratch_dir,'results_comparison')
687
+ image_folder = os.path.join(options.scratch_dir,'md-test-images')
688
+ results_files_string = '"{}" "{}"'.format(
689
+ inference_output_file,inference_output_file_alt)
690
+ if options.cli_working_dir is None:
691
+ cmd = 'python -m api.batch_processing.postprocessing.compare_batch_results'
692
+ else:
693
+ cmd = 'python api/batch_processing/postprocessing/compare_batch_results.py'
694
+ cmd += ' {} {} {}'.format(comparison_output_folder,image_folder,results_files_string)
695
+ print('Running: {}'.format(cmd))
696
+ cmd_results = execute_and_print(cmd)
697
+
698
+ assert cmd_results['status'] == 0, 'Error generating comparison HTML'
699
+ assert os.path.isfile(os.path.join(comparison_output_folder,'index.html')), \
700
+ 'Failed to generate comparison HTML'
701
+
702
+ print('\n*** Finished CLI tests ***\n')
703
+
476
704
  # ...def run_cli_tests(...)
477
705
 
478
706
 
@@ -518,9 +746,19 @@ if False:
518
746
 
519
747
  options.disable_gpu = False
520
748
  options.cpu_execution_is_error = False
521
- options.disable_video_tests = False
749
+ options.skip_video_tests = False
750
+ options.skip_python_tests = False
751
+ options.skip_cli_tests = False
522
752
  options.scratch_dir = None
753
+ options.test_data_url = 'https://lila.science/public/md-test-package.zip'
754
+ options.force_data_download = False
755
+ options.force_data_unzip = False
756
+ options.warning_mode = True
757
+ options.test_image_subdir = 'md-test-images'
758
+ options.max_coord_error = 0.001
759
+ options.max_conf_error = 0.005
523
760
  options.cli_working_dir = r'c:\git\MegaDetector'
761
+ options.yolo_working_folder = r'c:\git\yolov5'
524
762
 
525
763
 
526
764
  #%%
md_utils/path_utils.py CHANGED
@@ -21,7 +21,8 @@ import zipfile
21
21
  from zipfile import ZipFile
22
22
  from datetime import datetime
23
23
  from typing import Container, Iterable, List, Optional, Tuple, Sequence
24
- from multiprocessing.pool import ThreadPool
24
+ from multiprocessing.pool import Pool, ThreadPool
25
+ from functools import partial
25
26
  from tqdm import tqdm
26
27
 
27
28
  IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
@@ -34,31 +35,51 @@ CHAR_LIMIT = 255
34
35
 
35
36
  #%% General path functions
36
37
 
37
- def recursive_file_list(base_dir, convert_slashes=True, return_relative_paths=False):
38
- """
38
+ def recursive_file_list(base_dir, convert_slashes=True,
39
+ return_relative_paths=False, sort_files=True,
40
+ recursive=True):
41
+ r"""
39
42
  Enumerate files (not directories) in [base_dir], optionally converting
40
43
  \ to /
41
44
  """
42
45
 
43
46
  all_files = []
44
47
 
45
- for root, _, filenames in os.walk(base_dir):
46
- for filename in filenames:
47
- full_path = os.path.join(root, filename)
48
- all_files.append(full_path)
49
-
48
+ if recursive:
49
+ for root, _, filenames in os.walk(base_dir):
50
+ for filename in filenames:
51
+ full_path = os.path.join(root, filename)
52
+ all_files.append(full_path)
53
+ else:
54
+ all_files_relative = os.listdir(base_dir)
55
+ all_files = [os.path.join(base_dir,fn) for fn in all_files_relative]
56
+ all_files = [fn for fn in all_files if os.path.isfile(fn)]
57
+
50
58
  if return_relative_paths:
51
59
  all_files = [os.path.relpath(fn,base_dir) for fn in all_files]
52
60
 
53
61
  if convert_slashes:
54
62
  all_files = [fn.replace('\\', '/') for fn in all_files]
63
+
64
+ if sort_files:
65
+ all_files = sorted(all_files)
55
66
 
56
- all_files = sorted(all_files)
57
67
  return all_files
58
68
 
59
69
 
60
- def split_path(path: str) -> List[str]:
70
+ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_files=True,
71
+ recursive=False):
61
72
  """
73
+ Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
74
+ it doesn't really make sense to have a "recursive" option in a function called "recursive_file_list".
75
+ """
76
+
77
+ return recursive_file_list(base_dir,convert_slashes,return_relative_paths,sort_files,
78
+ recursive=recursive)
79
+
80
+
81
+ def split_path(path: str) -> List[str]:
82
+ r"""
62
83
  Splits [path] into all its constituent tokens.
63
84
 
64
85
  Non-recursive version of:
@@ -88,7 +109,7 @@ def split_path(path: str) -> List[str]:
88
109
 
89
110
 
90
111
  def fileparts(path: str) -> Tuple[str, str, str]:
91
- """
112
+ r"""
92
113
  Breaks down a path into the directory path, filename, and extension.
93
114
 
94
115
  Note that the '.' lives with the extension, and separators are removed.
@@ -187,7 +208,8 @@ def safe_create_link(link_exists,link_new):
187
208
  it.
188
209
 
189
210
  Errors if link_new already exists but it's not a link.
190
- """
211
+ """
212
+
191
213
  if os.path.exists(link_new) or os.path.islink(link_new):
192
214
  assert os.path.islink(link_new)
193
215
  if not os.readlink(link_new) == link_exists:
@@ -240,7 +262,8 @@ def find_image_strings(strings: Iterable[str]) -> List[str]:
240
262
 
241
263
 
242
264
  def find_images(dirname: str, recursive: bool = False,
243
- return_relative_paths: bool = False, convert_slashes: bool = False) -> List[str]:
265
+ return_relative_paths: bool = False,
266
+ convert_slashes: bool = False) -> List[str]:
244
267
  """
245
268
  Finds all files in a directory that look like image file names. Returns
246
269
  absolute paths unless return_relative_paths is set. Uses the OS-native
@@ -270,11 +293,11 @@ def find_images(dirname: str, recursive: bool = False,
270
293
 
271
294
  def clean_filename(filename: str, allow_list: str = VALID_FILENAME_CHARS,
272
295
  char_limit: int = CHAR_LIMIT, force_lower: bool = False) -> str:
273
- """
296
+ r"""
274
297
  Removes non-ASCII and other invalid filename characters (on any
275
298
  reasonable OS) from a filename, then trims to a maximum length.
276
299
 
277
- Does not allow :\/, use clean_path if you want to preserve those.
300
+ Does not allow :\/ by default, use clean_path if you want to preserve those.
278
301
 
279
302
  Adapted from
280
303
  https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
@@ -319,15 +342,71 @@ def flatten_path(pathname: str, separator_chars: str = SEPARATOR_CHARS) -> str:
319
342
 
320
343
  #%% Platform-independent way to open files in their associated application
321
344
 
322
- import sys,subprocess
345
+ import sys,subprocess,platform,re
346
+
347
+ def environment_is_wsl():
348
+ """
349
+ Returns True if we're running in WSL
350
+ """
351
+
352
+ if sys.platform not in ('linux','posix'):
353
+ return False
354
+ platform_string = ' '.join(platform.uname()).lower()
355
+ return 'microsoft' in platform_string and 'wsl' in platform_string
356
+
357
+
358
+ def wsl_path_to_windows_path(filename):
359
+ """
360
+ Converts a WSL path to a Windows path, or returns None if that's not possible. E.g.
361
+ converts:
362
+
363
+ /mnt/e/a/b/c
364
+
365
+ ...to:
366
+
367
+ e:\a\b\c
368
+ """
369
+
370
+ result = subprocess.run(['wslpath', '-w', filename], text=True, capture_output=True)
371
+ if result.returncode != 0:
372
+ print('Could not convert path {} from WSL to Windows'.format(filename))
373
+ return None
374
+ return result.stdout.strip()
375
+
323
376
 
324
- def open_file(filename):
325
- if sys.platform == "win32":
377
+ def open_file(filename,attempt_to_open_in_wsl_host=False):
378
+ """
379
+ Opens [filename] in the native OS file handler. If attempt_to_open_in_wsl_host
380
+ is True, and we're in WSL, attempts to open [filename] in Windows.
381
+ """
382
+
383
+ if sys.platform == 'win32':
384
+
326
385
  os.startfile(filename)
386
+
387
+ elif sys.platform == 'darwin':
388
+
389
+ opener = 'open'
390
+ subprocess.call([opener, filename])
391
+
392
+ elif attempt_to_open_in_wsl_host and environment_is_wsl():
393
+
394
+ windows_path = wsl_path_to_windows_path(filename)
395
+
396
+ # Fall back to xdg-open
397
+ if windows_path is None:
398
+ subprocess.call(['xdg-open', filename])
399
+
400
+ if os.path.isdir(filename):
401
+ subprocess.run(["explorer.exe", windows_path])
402
+ else:
403
+ os.system("cmd.exe /C start %s" % (re.escape(windows_path)))
404
+
327
405
  else:
328
- opener = "open" if sys.platform == "darwin" else "xdg-open"
406
+
407
+ opener = 'xdg-open'
329
408
  subprocess.call([opener, filename])
330
-
409
+
331
410
 
332
411
  #%% File list functions
333
412
 
@@ -403,7 +482,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
403
482
  relative_filenames = recursive_file_list(input_folder,return_relative_paths=True)
404
483
 
405
484
  with ZipFile(output_fn,'w',zipfile.ZIP_DEFLATED) as zipf:
406
- for input_fn_relative in relative_filenames:
485
+ for input_fn_relative in tqdm(relative_filenames,disable=(not verbose)):
407
486
  input_fn_abs = os.path.join(input_folder,input_fn_relative)
408
487
  zipf.write(input_fn_abs,
409
488
  arcname=input_fn_relative,
@@ -413,19 +492,45 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
413
492
  return output_fn
414
493
 
415
494
 
416
- def parallel_zip_files(input_files,max_workers=16):
495
+ def parallel_zip_files(input_files, max_workers=16, use_threads=True):
417
496
  """
418
497
  Zip one or more files to separate output files in parallel, leaving the
419
- original files in place.
498
+ original files in place. Each file is zipped to [filename].zip.
420
499
  """
421
500
 
422
501
  n_workers = min(max_workers,len(input_files))
423
- pool = ThreadPool(n_workers)
502
+
503
+ if use_threads:
504
+ pool = ThreadPool(n_workers)
505
+ else:
506
+ pool = Pool(n_workers)
507
+
424
508
  with tqdm(total=len(input_files)) as pbar:
425
509
  for i,_ in enumerate(pool.imap_unordered(zip_file,input_files)):
426
510
  pbar.update()
427
511
 
428
512
 
513
+ def parallel_zip_folders(input_folders, max_workers=16, use_threads=True,
514
+ compresslevel=9, overwrite=False):
515
+ """
516
+ Zip one or more folders to separate output files in parallel, leaving the
517
+ original folders in place. Each folder is zipped to [folder_name].zip.
518
+ """
519
+
520
+ n_workers = min(max_workers,len(input_folders))
521
+
522
+ if use_threads:
523
+ pool = ThreadPool(n_workers)
524
+ else:
525
+ pool = Pool(n_workers)
526
+
527
+ with tqdm(total=len(input_folders)) as pbar:
528
+ for i,_ in enumerate(pool.imap_unordered(
529
+ partial(zip_folder,overwrite=overwrite,compresslevel=compresslevel),
530
+ input_folders)):
531
+ pbar.update()
532
+
533
+
429
534
  def unzip_file(input_file, output_folder=None):
430
535
  """
431
536
  Unzip a zipfile to the specified output folder, defaulting to the same location as