megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
  6. megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
  7. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  8. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  9. megadetector/data_management/remap_coco_categories.py +60 -11
  10. megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
  11. megadetector/data_management/yolo_to_coco.py +45 -15
  12. megadetector/detection/run_detector.py +1 -0
  13. megadetector/detection/run_detector_batch.py +5 -4
  14. megadetector/postprocessing/classification_postprocessing.py +788 -524
  15. megadetector/postprocessing/compare_batch_results.py +176 -9
  16. megadetector/postprocessing/create_crop_folder.py +420 -0
  17. megadetector/postprocessing/load_api_results.py +4 -1
  18. megadetector/postprocessing/md_to_coco.py +1 -1
  19. megadetector/postprocessing/postprocess_batch_results.py +158 -44
  20. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  21. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  22. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  23. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  24. megadetector/postprocessing/validate_batch_results.py +13 -5
  25. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  27. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  28. megadetector/utils/ct_utils.py +76 -3
  29. megadetector/utils/directory_listing.py +4 -4
  30. megadetector/utils/gpu_test.py +21 -3
  31. megadetector/utils/md_tests.py +142 -49
  32. megadetector/utils/path_utils.py +342 -19
  33. megadetector/utils/wi_utils.py +1286 -212
  34. megadetector/visualization/visualization_utils.py +16 -4
  35. megadetector/visualization/visualize_db.py +1 -1
  36. megadetector/visualization/visualize_detector_output.py +1 -4
  37. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
  38. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
  39. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
  40. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
  41. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
@@ -27,12 +27,14 @@ import re
27
27
 
28
28
  from zipfile import ZipFile
29
29
  from datetime import datetime
30
+ from collections import defaultdict
30
31
  from multiprocessing.pool import Pool, ThreadPool
31
32
  from functools import partial
32
33
  from shutil import which
33
34
  from tqdm import tqdm
34
35
 
35
36
  from megadetector.utils.ct_utils import is_iterable
37
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
36
38
 
37
39
  # Should all be lower-case
38
40
  IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
@@ -51,8 +53,7 @@ def recursive_file_list(base_dir,
51
53
  sort_files=True,
52
54
  recursive=True):
53
55
  r"""
54
- Enumerates files (not directories) in [base_dir], optionally converting
55
- backslahes to slashes
56
+ Enumerates files (not directories) in [base_dir].
56
57
 
57
58
  Args:
58
59
  base_dir (str): folder to enumerate
@@ -94,12 +95,15 @@ def recursive_file_list(base_dir,
94
95
  return all_files
95
96
 
96
97
 
97
- def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_files=True,
98
+ def file_list(base_dir,
99
+ convert_slashes=True,
100
+ return_relative_paths=False,
101
+ sort_files=True,
98
102
  recursive=False):
99
103
  """
100
- Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
101
- since it doesn't really make sense to have a "recursive" option in a function called
102
- "recursive_file_list".
104
+ Trivial wrapper for recursive_file_list, which was a poor function name choice
105
+ at the time, since I later wanted to add non-recursive lists, but it doesn't
106
+ make sense to have a "recursive" option in a function called "recursive_file_list".
103
107
 
104
108
  Args:
105
109
  base_dir (str): folder to enumerate
@@ -119,6 +123,99 @@ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_
119
123
  recursive=recursive)
120
124
 
121
125
 
126
+ def folder_list(base_dir,
127
+ convert_slashes=True,
128
+ return_relative_paths=False,
129
+ sort_folders=True,
130
+ recursive=False):
131
+
132
+ """
133
+ Enumerates folders (not files) in [base_dir].
134
+
135
+ Args:
136
+ base_dir (str): folder to enumerate
137
+ convert_slashes (bool, optional): force forward slashes; if this is False, will use
138
+ the native path separator
139
+ return_relative_paths (bool, optional): return paths that are relative to [base_dir],
140
+ rather than absolute paths
141
+ sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
142
+ provided by os.walk()
143
+ recursive (bool, optional): enumerate recursively
144
+
145
+ Returns:
146
+ list: list of folder names
147
+ """
148
+
149
+ assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
150
+
151
+ folders = []
152
+
153
+ if recursive:
154
+ folders = []
155
+ for root, dirs, _ in os.walk(base_dir):
156
+ for d in dirs:
157
+ folders.append(os.path.join(root, d))
158
+ else:
159
+ folders = os.listdir(base_dir)
160
+ folders = [os.path.join(base_dir,fn) for fn in folders]
161
+ folders = [fn for fn in folders if os.path.isdir(fn)]
162
+
163
+ if return_relative_paths:
164
+ folders = [os.path.relpath(fn,base_dir) for fn in folders]
165
+
166
+ if convert_slashes:
167
+ folders = [fn.replace('\\', '/') for fn in folders]
168
+
169
+ if sort_folders:
170
+ folders = sorted(folders)
171
+
172
+ return folders
173
+
174
+
175
+ def folder_summary(folder,print_summary=True):
176
+ """
177
+ Returns (and optionally prints) a summary of [folder], including:
178
+
179
+ * The total number of files
180
+ * The total number of folders
181
+ * The number of files for each extension
182
+
183
+ Args:
184
+ folder (str): folder to summarize
185
+ print_summary (bool, optional): whether to print the summary
186
+
187
+ Returns:
188
+ dict: with fields "n_files", "n_folders", and "extension_to_count"
189
+ """
190
+
191
+ assert os.path.isdir(folder), '{} is not a folder'.format(folder)
192
+
193
+ folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
194
+ files_relative = file_list(folder,return_relative_paths=True,recursive=True)
195
+
196
+ extension_to_count = defaultdict(int)
197
+
198
+ for fn in files_relative:
199
+ ext = os.path.splitext(fn)[1]
200
+ extension_to_count[ext] += 1
201
+
202
+ extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
203
+
204
+ if print_summary:
205
+ for extension in extension_to_count.keys():
206
+ print('{}: {}'.format(extension,extension_to_count[extension]))
207
+ print('')
208
+ print('Total files: {}'.format(len(files_relative)))
209
+ print('Total folders: {}'.format(len(folders_relative)))
210
+
211
+ to_return = {}
212
+ to_return['n_files'] = len(files_relative)
213
+ to_return['n_folders'] = len(folders_relative)
214
+ to_return['extension_to_count'] = extension_to_count
215
+
216
+ return to_return
217
+
218
+
122
219
  def fileparts(path):
123
220
  r"""
124
221
  Breaks down a path into the directory path, filename, and extension.
@@ -263,6 +360,56 @@ def safe_create_link(link_exists,link_new):
263
360
  os.symlink(link_exists,link_new)
264
361
 
265
362
 
363
+ def remove_empty_folders(path, remove_root=False):
364
+ """
365
+ Recursively removes empty folders within the specified path.
366
+
367
+ Args:
368
+ path (str): the folder from which we should recursively remove
369
+ empty folders.
370
+ remove_root (bool, optional): whether to remove the root directory if
371
+ it's empty after removing all empty subdirectories. This will always
372
+ be True during recursive calls.
373
+
374
+ Returns:
375
+ bool: True if the directory is empty after processing, False otherwise
376
+ """
377
+
378
+ # Verify that [path] is a directory
379
+ if not os.path.isdir(path):
380
+ return False
381
+
382
+ # Track whether the current directory is empty
383
+ is_empty = True
384
+
385
+ # Iterate through all items in the directory
386
+ for item in os.listdir(path):
387
+
388
+ item_path = os.path.join(path, item)
389
+
390
+ # If it's a directory, process it recursively
391
+ if os.path.isdir(item_path):
392
+ # If the subdirectory is empty after processing, it will be removed
393
+ if not remove_empty_folders(item_path, True):
394
+ # If the subdirectory is not empty, the current directory isn't empty either
395
+ is_empty = False
396
+ else:
397
+ # If there's a file, the directory is not empty
398
+ is_empty = False
399
+
400
+ # If the directory is empty and we're supposed to remove it
401
+ if is_empty and remove_root:
402
+ try:
403
+ os.rmdir(path)
404
+ except Exception as e:
405
+ print('Error removing directory {}: {}'.format(path,str(e)))
406
+ is_empty = False
407
+
408
+ return is_empty
409
+
410
+ # ...def remove_empty_folders(...)
411
+
412
+
266
413
  def top_level_folder(p):
267
414
  r"""
268
415
  Gets the top-level folder from the path *p*.
@@ -547,9 +694,161 @@ def wsl_path_to_windows_path(filename):
547
694
  if result.returncode != 0:
548
695
  print('Could not convert path {} from WSL to Windows'.format(filename))
549
696
  return None
697
+
550
698
  return result.stdout.strip()
551
699
 
700
+
701
+ def windows_path_to_wsl_path(filename):
702
+ r"""
703
+ Converts a Windows path to a WSL path, or returns None if that's not possible. E.g.
704
+ converts:
705
+
706
+ e:\a\b\c
707
+
708
+ ...to:
709
+
710
+ /mnt/e/a/b/c
711
+
712
+ Args:
713
+ filename (str): filename to convert
714
+
715
+ Returns:
716
+ str: WSL equivalent to the Windows path [filename], or [filename] if the current
717
+ environment is neither Windows nor WSL.
718
+ """
719
+
720
+ if (not environment_is_wsl()) and (os.name != 'nt'):
721
+ return filename
722
+
723
+ if environment_is_wsl():
724
+ result = subprocess.run(['wslpath', '-u', filename], text=True, capture_output=True)
725
+ else:
726
+ result = subprocess.run(['wsl', 'wslpath', '-u', filename], text=True, capture_output=True)
727
+ if result.returncode != 0:
728
+ print('Could not convert path {} from Windows to WSL'.format(filename))
729
+ return None
730
+
731
+ return result.stdout.strip()
732
+
733
+
734
+ def open_file_in_chrome(filename):
735
+ """
736
+ Open a file in chrome, regardless of file type. I typically use this to open
737
+ .md files in Chrome.
738
+
739
+ Args:
740
+ filename (str): file to open
741
+
742
+ Return:
743
+ bool: whether the operation was successful
744
+ """
745
+
746
+ # Create URL
747
+ abs_path = os.path.abspath(filename)
552
748
 
749
+ system = platform.system()
750
+ if system == 'Windows':
751
+ url = f'file:///{abs_path.replace(os.sep, "/")}'
752
+ else: # macOS and Linux
753
+ url = f'file://{abs_path}'
754
+
755
+ # Determine the Chrome path
756
+ if system == 'Windows':
757
+
758
+ # This is a native Python module, but it only exists on Windows
759
+ import winreg
760
+
761
+ chrome_paths = [
762
+ os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
763
+ r"C:\Program Files\Google\Chrome\Application\chrome.exe",
764
+ r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
765
+ ]
766
+
767
+ # Default approach: run from a typical chrome location
768
+ for path in chrome_paths:
769
+ if os.path.exists(path):
770
+ subprocess.run([path, url])
771
+ return True
772
+
773
+ # Method 2: Check registry for Chrome path
774
+ try:
775
+ with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
776
+ r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
777
+ chrome_path = winreg.QueryValue(key, None)
778
+ if chrome_path and os.path.exists(chrome_path):
779
+ subprocess.run([chrome_path, url])
780
+ return True
781
+ except:
782
+ pass
783
+
784
+ # Method 3: Try alternate registry location
785
+ try:
786
+ with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
787
+ r"Software\Google\Chrome\BLBeacon") as key:
788
+ chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
789
+ if os.path.exists(chrome_path):
790
+ subprocess.run([chrome_path, url])
791
+ return True
792
+ except:
793
+ pass
794
+
795
+ # Method 4: Try system path or command
796
+ for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
797
+ try:
798
+ subprocess.run([chrome_cmd, url], shell=True)
799
+ return True
800
+ except:
801
+ continue
802
+
803
+ # Method 5: Use Windows URL protocol handler
804
+ try:
805
+ os.startfile(url)
806
+ return True
807
+ except:
808
+ pass
809
+
810
+ # Method 6: Use rundll32
811
+ try:
812
+ cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
813
+ subprocess.run(cmd, shell=True)
814
+ return True
815
+ except:
816
+ pass
817
+
818
+ elif system == 'Darwin':
819
+
820
+ chrome_paths = [
821
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
822
+ os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
823
+ ]
824
+
825
+ for path in chrome_paths:
826
+ if os.path.exists(path):
827
+ subprocess.run([path, url])
828
+ return True
829
+
830
+ # Fallback to 'open' command with Chrome as the app
831
+ try:
832
+ subprocess.run(['open', '-a', 'Google Chrome', url])
833
+ return True
834
+ except:
835
+ pass
836
+
837
+ elif system == 'Linux':
838
+
839
+ chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
840
+
841
+ for cmd in chrome_commands:
842
+ try:
843
+ subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
844
+ return True
845
+ except:
846
+ continue
847
+
848
+ print(f"Could not open {filename} in Chrome on {system}.")
849
+ return False
850
+
851
+
553
852
  def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
554
853
  """
555
854
  Opens [filename] in the default OS file handler for this file type.
@@ -611,7 +910,7 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
611
910
  # ...def open_file(...)
612
911
 
613
912
 
614
- #%% File list functions
913
+ #%% File list functions (as in, files that are lists of other filenames)
615
914
 
616
915
  def write_list_to_file(output_file,strings):
617
916
  """
@@ -650,7 +949,9 @@ def read_list_from_file(filename):
650
949
  return file_list
651
950
 
652
951
 
653
- def _copy_file(input_output_tuple,overwrite=True,verbose=False):
952
+ #%% File copying functions
953
+
954
+ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
654
955
  """
655
956
  Internal function for copying files from within parallel_copy_files.
656
957
  """
@@ -663,17 +964,29 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
663
964
  print('Skipping existing target file {}'.format(target_fn))
664
965
  return
665
966
 
967
+ if move:
968
+ action_string = 'Moving'
969
+ else:
970
+ action_string = 'Copying'
971
+
666
972
  if verbose:
667
- print('Copying to target file {}'.format(target_fn))
973
+ print('{} to {}'.format(action_string,target_fn))
668
974
 
669
975
  os.makedirs(os.path.dirname(target_fn),exist_ok=True)
670
- shutil.copyfile(source_fn,target_fn)
671
-
976
+ if move:
977
+ shutil.move(source_fn, target_fn)
978
+ else:
979
+ shutil.copyfile(source_fn,target_fn)
980
+
672
981
 
673
- def parallel_copy_files(input_file_to_output_file, max_workers=16,
674
- use_threads=True, overwrite=False, verbose=False):
982
+ def parallel_copy_files(input_file_to_output_file,
983
+ max_workers=16,
984
+ use_threads=True,
985
+ overwrite=False,
986
+ verbose=False,
987
+ move=False):
675
988
  """
676
- Copies files from source to target according to the dict input_file_to_output_file.
989
+ Copy (or move) files from source to target according to the dict input_file_to_output_file.
677
990
 
678
991
  Args:
679
992
  input_file_to_output_file (dict): dictionary mapping source files to the target files
@@ -682,7 +995,8 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
682
995
  use_threads (bool, optional): whether to use threads (True) or processes (False) for
683
996
  parallel copying; ignored if max_workers <= 1
684
997
  overwrite (bool, optional): whether to overwrite existing destination files
685
- verbose (bool, optional): enable additional debug output
998
+ verbose (bool, optional): enable additional debug output
999
+ move (bool, optional): move instead of copying
686
1000
  """
687
1001
 
688
1002
  n_workers = min(max_workers,len(input_file_to_output_file))
@@ -698,13 +1012,18 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
698
1012
  pool = Pool(n_workers)
699
1013
 
700
1014
  with tqdm(total=len(input_output_tuples)) as pbar:
701
- for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,overwrite=overwrite,verbose=verbose),
1015
+ for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
1016
+ overwrite=overwrite,
1017
+ verbose=verbose,
1018
+ move=move),
702
1019
  input_output_tuples)):
703
1020
  pbar.update()
704
1021
 
705
1022
  # ...def parallel_copy_files(...)
706
1023
 
707
1024
 
1025
+ #%% File size functions
1026
+
708
1027
  def get_file_sizes(base_dir, convert_slashes=True):
709
1028
  """
710
1029
  Gets sizes recursively for all files in base_dir, returning a dict mapping
@@ -827,7 +1146,7 @@ def parallel_get_file_sizes(filenames,
827
1146
  # ...def parallel_get_file_sizes(...)
828
1147
 
829
1148
 
830
- #%% Zip functions
1149
+ #%% Compression (zip/tar) functions
831
1150
 
832
1151
  def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
833
1152
  """
@@ -985,8 +1304,12 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
985
1304
  return output_fn
986
1305
 
987
1306
 
988
- def parallel_zip_files(input_files, max_workers=16, use_threads=True, compresslevel=9,
989
- overwrite=False, verbose=False):
1307
+ def parallel_zip_files(input_files,
1308
+ max_workers=16,
1309
+ use_threads=True,
1310
+ compresslevel=9,
1311
+ overwrite=False,
1312
+ verbose=False):
990
1313
  """
991
1314
  Zips one or more files to separate output files in parallel, leaving the
992
1315
  original files in place. Each file is zipped to [filename].zip.