megadetector 5.0.25__py3-none-any.whl → 5.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  6. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  7. megadetector/data_management/remap_coco_categories.py +60 -11
  8. megadetector/data_management/yolo_to_coco.py +45 -15
  9. megadetector/postprocessing/classification_postprocessing.py +788 -524
  10. megadetector/postprocessing/create_crop_folder.py +95 -33
  11. megadetector/postprocessing/load_api_results.py +4 -1
  12. megadetector/postprocessing/md_to_coco.py +1 -1
  13. megadetector/postprocessing/postprocess_batch_results.py +156 -42
  14. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  15. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  16. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  17. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  18. megadetector/postprocessing/validate_batch_results.py +13 -5
  19. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  20. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  21. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  22. megadetector/utils/ct_utils.py +4 -2
  23. megadetector/utils/directory_listing.py +1 -1
  24. megadetector/utils/md_tests.py +2 -1
  25. megadetector/utils/path_utils.py +308 -19
  26. megadetector/utils/wi_utils.py +363 -186
  27. megadetector/visualization/visualization_utils.py +2 -1
  28. megadetector/visualization/visualize_db.py +1 -1
  29. megadetector/visualization/visualize_detector_output.py +1 -4
  30. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/METADATA +4 -3
  31. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/RECORD +34 -34
  32. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
  33. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
  34. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
@@ -27,12 +27,14 @@ import re
27
27
 
28
28
  from zipfile import ZipFile
29
29
  from datetime import datetime
30
+ from collections import defaultdict
30
31
  from multiprocessing.pool import Pool, ThreadPool
31
32
  from functools import partial
32
33
  from shutil import which
33
34
  from tqdm import tqdm
34
35
 
35
36
  from megadetector.utils.ct_utils import is_iterable
37
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
36
38
 
37
39
  # Should all be lower-case
38
40
  IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
@@ -51,8 +53,7 @@ def recursive_file_list(base_dir,
51
53
  sort_files=True,
52
54
  recursive=True):
53
55
  r"""
54
- Enumerates files (not directories) in [base_dir], optionally converting
55
- backslahes to slashes
56
+ Enumerates files (not directories) in [base_dir].
56
57
 
57
58
  Args:
58
59
  base_dir (str): folder to enumerate
@@ -94,12 +95,15 @@ def recursive_file_list(base_dir,
94
95
  return all_files
95
96
 
96
97
 
97
- def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_files=True,
98
+ def file_list(base_dir,
99
+ convert_slashes=True,
100
+ return_relative_paths=False,
101
+ sort_files=True,
98
102
  recursive=False):
99
103
  """
100
- Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
101
- since it doesn't really make sense to have a "recursive" option in a function called
102
- "recursive_file_list".
104
+ Trivial wrapper for recursive_file_list, which was a poor function name choice
105
+ at the time, since I later wanted to add non-recursive lists, but it doesn't
106
+ make sense to have a "recursive" option in a function called "recursive_file_list".
103
107
 
104
108
  Args:
105
109
  base_dir (str): folder to enumerate
@@ -119,6 +123,99 @@ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_
119
123
  recursive=recursive)
120
124
 
121
125
 
126
+ def folder_list(base_dir,
127
+ convert_slashes=True,
128
+ return_relative_paths=False,
129
+ sort_folders=True,
130
+ recursive=False):
131
+
132
+ """
133
+ Enumerates folders (not files) in [base_dir].
134
+
135
+ Args:
136
+ base_dir (str): folder to enumerate
137
+ convert_slashes (bool, optional): force forward slashes; if this is False, will use
138
+ the native path separator
139
+ return_relative_paths (bool, optional): return paths that are relative to [base_dir],
140
+ rather than absolute paths
141
+ sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
142
+ provided by os.walk()
143
+ recursive (bool, optional): enumerate recursively
144
+
145
+ Returns:
146
+ list: list of folder names
147
+ """
148
+
149
+ assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
150
+
151
+ folders = []
152
+
153
+ if recursive:
154
+ folders = []
155
+ for root, dirs, _ in os.walk(base_dir):
156
+ for d in dirs:
157
+ folders.append(os.path.join(root, d))
158
+ else:
159
+ folders = os.listdir(base_dir)
160
+ folders = [os.path.join(base_dir,fn) for fn in folders]
161
+ folders = [fn for fn in folders if os.path.isdir(fn)]
162
+
163
+ if return_relative_paths:
164
+ folders = [os.path.relpath(fn,base_dir) for fn in folders]
165
+
166
+ if convert_slashes:
167
+ folders = [fn.replace('\\', '/') for fn in folders]
168
+
169
+ if sort_folders:
170
+ folders = sorted(folders)
171
+
172
+ return folders
173
+
174
+
175
+ def folder_summary(folder,print_summary=True):
176
+ """
177
+ Returns (and optionally prints) a summary of [folder], including:
178
+
179
+ * The total number of files
180
+ * The total number of folders
181
+ * The number of files for each extension
182
+
183
+ Args:
184
+ folder (str): folder to summarize
185
+ print_summary (bool, optional): whether to print the summary
186
+
187
+ Returns:
188
+ dict: with fields "n_files", "n_folders", and "extension_to_count"
189
+ """
190
+
191
+ assert os.path.isdir(folder), '{} is not a folder'.format(folder)
192
+
193
+ folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
194
+ files_relative = file_list(folder,return_relative_paths=True,recursive=True)
195
+
196
+ extension_to_count = defaultdict(int)
197
+
198
+ for fn in files_relative:
199
+ ext = os.path.splitext(fn)[1]
200
+ extension_to_count[ext] += 1
201
+
202
+ extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
203
+
204
+ if print_summary:
205
+ for extension in extension_to_count.keys():
206
+ print('{}: {}'.format(extension,extension_to_count[extension]))
207
+ print('')
208
+ print('Total files: {}'.format(len(files_relative)))
209
+ print('Total folders: {}'.format(len(folders_relative)))
210
+
211
+ to_return = {}
212
+ to_return['n_files'] = len(files_relative)
213
+ to_return['n_folders'] = len(folders_relative)
214
+ to_return['extension_to_count'] = extension_to_count
215
+
216
+ return to_return
217
+
218
+
122
219
  def fileparts(path):
123
220
  r"""
124
221
  Breaks down a path into the directory path, filename, and extension.
@@ -263,6 +360,56 @@ def safe_create_link(link_exists,link_new):
263
360
  os.symlink(link_exists,link_new)
264
361
 
265
362
 
363
+ def remove_empty_folders(path, remove_root=False):
364
+ """
365
+ Recursively removes empty folders within the specified path.
366
+
367
+ Args:
368
+ path (str): the folder from which we should recursively remove
369
+ empty folders.
370
+ remove_root (bool, optional): whether to remove the root directory if
371
+ it's empty after removing all empty subdirectories. This will always
372
+ be True during recursive calls.
373
+
374
+ Returns:
375
+ bool: True if the directory is empty after processing, False otherwise
376
+ """
377
+
378
+ # Verify that [path] is a directory
379
+ if not os.path.isdir(path):
380
+ return False
381
+
382
+ # Track whether the current directory is empty
383
+ is_empty = True
384
+
385
+ # Iterate through all items in the directory
386
+ for item in os.listdir(path):
387
+
388
+ item_path = os.path.join(path, item)
389
+
390
+ # If it's a directory, process it recursively
391
+ if os.path.isdir(item_path):
392
+ # If the subdirectory is empty after processing, it will be removed
393
+ if not remove_empty_folders(item_path, True):
394
+ # If the subdirectory is not empty, the current directory isn't empty either
395
+ is_empty = False
396
+ else:
397
+ # If there's a file, the directory is not empty
398
+ is_empty = False
399
+
400
+ # If the directory is empty and we're supposed to remove it
401
+ if is_empty and remove_root:
402
+ try:
403
+ os.rmdir(path)
404
+ except Exception as e:
405
+ print('Error removing directory {}: {}'.format(path,str(e)))
406
+ is_empty = False
407
+
408
+ return is_empty
409
+
410
+ # ...def remove_empty_folders(...)
411
+
412
+
266
413
  def top_level_folder(p):
267
414
  r"""
268
415
  Gets the top-level folder from the path *p*.
@@ -582,8 +729,126 @@ def windows_path_to_wsl_path(filename):
582
729
  return None
583
730
 
584
731
  return result.stdout.strip()
732
+
585
733
 
734
+ def open_file_in_chrome(filename):
735
+ """
736
+ Open a file in chrome, regardless of file type. I typically use this to open
737
+ .md files in Chrome.
738
+
739
+ Args:
740
+ filename (str): file to open
741
+
742
+ Return:
743
+ bool: whether the operation was successful
744
+ """
745
+
746
+ # Create URL
747
+ abs_path = os.path.abspath(filename)
748
+
749
+ system = platform.system()
750
+ if system == 'Windows':
751
+ url = f'file:///{abs_path.replace(os.sep, "/")}'
752
+ else: # macOS and Linux
753
+ url = f'file://{abs_path}'
586
754
 
755
+ # Determine the Chrome path
756
+ if system == 'Windows':
757
+
758
+ # This is a native Python module, but it only exists on Windows
759
+ import winreg
760
+
761
+ chrome_paths = [
762
+ os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
763
+ r"C:\Program Files\Google\Chrome\Application\chrome.exe",
764
+ r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
765
+ ]
766
+
767
+ # Default approach: run from a typical chrome location
768
+ for path in chrome_paths:
769
+ if os.path.exists(path):
770
+ subprocess.run([path, url])
771
+ return True
772
+
773
+ # Method 2: Check registry for Chrome path
774
+ try:
775
+ with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
776
+ r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
777
+ chrome_path = winreg.QueryValue(key, None)
778
+ if chrome_path and os.path.exists(chrome_path):
779
+ subprocess.run([chrome_path, url])
780
+ return True
781
+ except:
782
+ pass
783
+
784
+ # Method 3: Try alternate registry location
785
+ try:
786
+ with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
787
+ r"Software\Google\Chrome\BLBeacon") as key:
788
+ chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
789
+ if os.path.exists(chrome_path):
790
+ subprocess.run([chrome_path, url])
791
+ return True
792
+ except:
793
+ pass
794
+
795
+ # Method 4: Try system path or command
796
+ for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
797
+ try:
798
+ subprocess.run([chrome_cmd, url], shell=True)
799
+ return True
800
+ except:
801
+ continue
802
+
803
+ # Method 5: Use Windows URL protocol handler
804
+ try:
805
+ os.startfile(url)
806
+ return True
807
+ except:
808
+ pass
809
+
810
+ # Method 6: Use rundll32
811
+ try:
812
+ cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
813
+ subprocess.run(cmd, shell=True)
814
+ return True
815
+ except:
816
+ pass
817
+
818
+ elif system == 'Darwin':
819
+
820
+ chrome_paths = [
821
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
822
+ os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
823
+ ]
824
+
825
+ for path in chrome_paths:
826
+ if os.path.exists(path):
827
+ subprocess.run([path, url])
828
+ return True
829
+
830
+ # Fallback to 'open' command with Chrome as the app
831
+ try:
832
+ subprocess.run(['open', '-a', 'Google Chrome', url])
833
+ return True
834
+ except:
835
+ pass
836
+
837
+ elif system == 'Linux':
838
+
839
+ chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
840
+
841
+ for cmd in chrome_commands:
842
+ try:
843
+ subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
844
+ return True
845
+ except:
846
+ continue
847
+
848
+ print(f"Could not open {filename} in Chrome on {system}.")
849
+ return False
850
+
851
+
587
852
  def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
588
853
  """
589
854
  Opens [filename] in the default OS file handler for this file type.
@@ -645,7 +910,7 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
645
910
  # ...def open_file(...)
646
911
 
647
912
 
648
- #%% File list functions
913
+ #%% File list functions (as in, files that are lists of other filenames)
649
914
 
650
915
  def write_list_to_file(output_file,strings):
651
916
  """
@@ -684,7 +949,9 @@ def read_list_from_file(filename):
684
949
  return file_list
685
950
 
686
951
 
687
- def _copy_file(input_output_tuple,overwrite=True,verbose=False):
952
+ #%% File copying functions
953
+
954
+ def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
688
955
  """
689
956
  Internal function for copying files from within parallel_copy_files.
690
957
  """
@@ -697,17 +964,29 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
697
964
  print('Skipping existing target file {}'.format(target_fn))
698
965
  return
699
966
 
967
+ if move:
968
+ action_string = 'Moving'
969
+ else:
970
+ action_string = 'Copying'
971
+
700
972
  if verbose:
701
- print('Copying to target file {}'.format(target_fn))
973
+ print('{} to {}'.format(action_string,target_fn))
702
974
 
703
975
  os.makedirs(os.path.dirname(target_fn),exist_ok=True)
704
- shutil.copyfile(source_fn,target_fn)
705
-
976
+ if move:
977
+ shutil.move(source_fn, target_fn)
978
+ else:
979
+ shutil.copyfile(source_fn,target_fn)
980
+
706
981
 
707
- def parallel_copy_files(input_file_to_output_file, max_workers=16,
708
- use_threads=True, overwrite=False, verbose=False):
982
+ def parallel_copy_files(input_file_to_output_file,
983
+ max_workers=16,
984
+ use_threads=True,
985
+ overwrite=False,
986
+ verbose=False,
987
+ move=False):
709
988
  """
710
- Copies files from source to target according to the dict input_file_to_output_file.
989
+ Copy (or move) files from source to target according to the dict input_file_to_output_file.
711
990
 
712
991
  Args:
713
992
  input_file_to_output_file (dict): dictionary mapping source files to the target files
@@ -716,7 +995,8 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
716
995
  use_threads (bool, optional): whether to use threads (True) or processes (False) for
717
996
  parallel copying; ignored if max_workers <= 1
718
997
  overwrite (bool, optional): whether to overwrite existing destination files
719
- verbose (bool, optional): enable additional debug output
998
+ verbose (bool, optional): enable additional debug output
999
+ move (bool, optional): move instead of copying
720
1000
  """
721
1001
 
722
1002
  n_workers = min(max_workers,len(input_file_to_output_file))
@@ -732,13 +1012,18 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
732
1012
  pool = Pool(n_workers)
733
1013
 
734
1014
  with tqdm(total=len(input_output_tuples)) as pbar:
735
- for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,overwrite=overwrite,verbose=verbose),
1015
+ for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
1016
+ overwrite=overwrite,
1017
+ verbose=verbose,
1018
+ move=move),
736
1019
  input_output_tuples)):
737
1020
  pbar.update()
738
1021
 
739
1022
  # ...def parallel_copy_files(...)
740
1023
 
741
1024
 
1025
+ #%% File size functions
1026
+
742
1027
  def get_file_sizes(base_dir, convert_slashes=True):
743
1028
  """
744
1029
  Gets sizes recursively for all files in base_dir, returning a dict mapping
@@ -861,7 +1146,7 @@ def parallel_get_file_sizes(filenames,
861
1146
  # ...def parallel_get_file_sizes(...)
862
1147
 
863
1148
 
864
- #%% Zip functions
1149
+ #%% Compression (zip/tar) functions
865
1150
 
866
1151
  def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
867
1152
  """
@@ -1019,8 +1304,12 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
1019
1304
  return output_fn
1020
1305
 
1021
1306
 
1022
- def parallel_zip_files(input_files, max_workers=16, use_threads=True, compresslevel=9,
1023
- overwrite=False, verbose=False):
1307
+ def parallel_zip_files(input_files,
1308
+ max_workers=16,
1309
+ use_threads=True,
1310
+ compresslevel=9,
1311
+ overwrite=False,
1312
+ verbose=False):
1024
1313
  """
1025
1314
  Zips one or more files to separate output files in parallel, leaving the
1026
1315
  original files in place. Each file is zipped to [filename].zip.