PyPI - megadetector - Versions diffs - 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl - Mend - Supply Chain Defender

megadetector 5.0.24py3-none-any.whl → 5.0.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show

megadetector/utils/path_utils.py CHANGED Viewed

@@ -27,12 +27,14 @@ import re
 from zipfile import ZipFile
 from datetime import datetime
+from collections import defaultdict
 from multiprocessing.pool import Pool, ThreadPool
 from functools import partial
 from shutil import which
 from tqdm import tqdm
 from megadetector.utils.ct_utils import is_iterable
+from megadetector.utils.ct_utils import sort_dictionary_by_value
 # Should all be lower-case
 IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
@@ -51,8 +53,7 @@ def recursive_file_list(base_dir,
                         sort_files=True,
                         recursive=True):
     r"""
-    Enumerates files (not directories) in [base_dir], optionally converting
-    backslahes to slashes
+    Enumerates files (not directories) in [base_dir].
     Args:
         base_dir (str): folder to enumerate
@@ -94,12 +95,15 @@ def recursive_file_list(base_dir,
     return all_files
-def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_files=True,
+def file_list(base_dir,
+              convert_slashes=True,
+              return_relative_paths=False,
+              sort_files=True,
               recursive=False):
     """
-    Trivial wrapper for recursive_file_list, which was a poor function name choice at the time,
-    since it doesn't really make sense to have a "recursive" option in a function called
-    "recursive_file_list".
+    Trivial wrapper for recursive_file_list, which was a poor function name choice
+    at the time, since I later wanted to add non-recursive lists, but it doesn't
+    make sense to have a "recursive" option in a function called  "recursive_file_list".
     Args:
         base_dir (str): folder to enumerate
@@ -119,6 +123,99 @@ def file_list(base_dir, convert_slashes=True, return_relative_paths=False, sort_
                                recursive=recursive)
+def folder_list(base_dir,
+                convert_slashes=True,
+                return_relative_paths=False,
+                sort_folders=True,
+                recursive=False):
+    """
+    Enumerates folders (not files) in [base_dir].
+    Args:
+        base_dir (str): folder to enumerate
+        convert_slashes (bool, optional): force forward slashes; if this is False, will use
+            the native path separator
+        return_relative_paths (bool, optional): return paths that are relative to [base_dir],
+            rather than absolute paths
+        sort_files (bool, optional): force folders to be sorted, otherwise uses the sorting
+            provided by os.walk()
+        recursive (bool, optional): enumerate recursively
+    Returns:
+        list: list of folder names
+    """
+    assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
+    folders = []
+    if recursive:
+        folders = []
+        for root, dirs, _ in os.walk(base_dir):
+            for d in dirs:
+                folders.append(os.path.join(root, d))
+    else:
+        folders = os.listdir(base_dir)
+        folders = [os.path.join(base_dir,fn) for fn in folders]
+        folders = [fn for fn in folders if os.path.isdir(fn)]
+    if return_relative_paths:
+        folders = [os.path.relpath(fn,base_dir) for fn in folders]
+    if convert_slashes:
+        folders = [fn.replace('\\', '/') for fn in folders]
+    if sort_folders:
+        folders = sorted(folders)
+    return folders
+def folder_summary(folder,print_summary=True):
+    """
+    Returns (and optionally prints) a summary of [folder], including:
+    * The total number of files
+    * The total number of folders
+    * The number of files for each extension
+    Args:
+        folder (str): folder to summarize
+        print_summary (bool, optional): whether to print the summary
+    Returns:
+        dict: with fields "n_files", "n_folders", and "extension_to_count"
+    """
+    assert os.path.isdir(folder), '{} is not a folder'.format(folder)
+    folders_relative = folder_list(folder,return_relative_paths=True,recursive=True)
+    files_relative = file_list(folder,return_relative_paths=True,recursive=True)
+    extension_to_count = defaultdict(int)
+    for fn in files_relative:
+        ext = os.path.splitext(fn)[1]
+        extension_to_count[ext] += 1
+    extension_to_count = sort_dictionary_by_value(extension_to_count,reverse=True)
+    if print_summary:
+        for extension in extension_to_count.keys():
+            print('{}: {}'.format(extension,extension_to_count[extension]))
+        print('')
+        print('Total files: {}'.format(len(files_relative)))
+        print('Total folders: {}'.format(len(folders_relative)))
+    to_return = {}
+    to_return['n_files'] = len(files_relative)
+    to_return['n_folders'] = len(folders_relative)
+    to_return['extension_to_count'] = extension_to_count
+    return to_return
 def fileparts(path):
     r"""
     Breaks down a path into the directory path, filename, and extension.
@@ -263,6 +360,56 @@ def safe_create_link(link_exists,link_new):
         os.symlink(link_exists,link_new)
+def remove_empty_folders(path, remove_root=False):
+    """
+    Recursively removes empty folders within the specified path.
+    Args:
+        path (str): the folder from which we should recursively remove
+            empty folders.
+        remove_root (bool, optional): whether to remove the root directory if
+            it's empty after removing all empty subdirectories.  This will always
+            be True during recursive calls.
+    Returns:
+        bool: True if the directory is empty after processing, False otherwise
+    """
+    # Verify that [path] is a directory
+    if not os.path.isdir(path):
+        return False
+    # Track whether the current directory is empty
+    is_empty = True
+    # Iterate through all items in the directory
+    for item in os.listdir(path):
+        item_path = os.path.join(path, item)
+        # If it's a directory, process it recursively
+        if os.path.isdir(item_path):
+            # If the subdirectory is empty after processing, it will be removed
+            if not remove_empty_folders(item_path, True):
+                # If the subdirectory is not empty, the current directory isn't empty either
+                is_empty = False
+        else:
+            # If there's a file, the directory is not empty
+            is_empty = False
+    # If the directory is empty and we're supposed to remove it
+    if is_empty and remove_root:
+        try:
+            os.rmdir(path)
+        except Exception as e:
+            print('Error removing directory {}: {}'.format(path,str(e)))
+            is_empty = False
+    return is_empty
+# ...def remove_empty_folders(...)
 def top_level_folder(p):
     r"""
     Gets the top-level folder from the path *p*.
@@ -547,9 +694,161 @@ def wsl_path_to_windows_path(filename):
     if result.returncode != 0:
         print('Could not convert path {} from WSL to Windows'.format(filename))
         return None
     return result.stdout.strip()
+def windows_path_to_wsl_path(filename):
+    r"""
+    Converts a Windows path to a WSL path, or returns None if that's not possible.  E.g.
+    converts:
+    e:\a\b\c
+    ...to:
+    /mnt/e/a/b/c
+    Args:
+        filename (str): filename to convert
+    Returns:
+        str: WSL equivalent to the Windows path [filename], or [filename] if the current
+        environment is neither Windows nor WSL.
+    """
+    if (not environment_is_wsl()) and (os.name != 'nt'):
+        return filename
+    if environment_is_wsl():
+        result = subprocess.run(['wslpath', '-u', filename], text=True, capture_output=True)
+    else:
+        result = subprocess.run(['wsl', 'wslpath', '-u', filename], text=True, capture_output=True)
+    if result.returncode != 0:
+        print('Could not convert path {} from Windows to WSL'.format(filename))
+        return None
+    return result.stdout.strip()
+def open_file_in_chrome(filename):
+    """
+    Open a file in chrome, regardless of file type.  I typically use this to open
+    .md files in Chrome.
+    Args:
+        filename (str): file to open
+    Return:
+        bool: whether the operation was successful
+    """
+    # Create URL
+    abs_path = os.path.abspath(filename)
+    system = platform.system()
+    if system == 'Windows':
+        url = f'file:///{abs_path.replace(os.sep, "/")}'
+    else:  # macOS and Linux
+        url = f'file://{abs_path}'
+    # Determine the Chrome path
+    if system == 'Windows':
+        # This is a native Python module, but it only exists on Windows
+        import winreg
+        chrome_paths = [
+            os.path.expanduser("~") + r"\AppData\Local\Google\Chrome\Application\chrome.exe",
+            r"C:\Program Files\Google\Chrome\Application\chrome.exe",
+            r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
+        ]
+        # Default approach: run from a typical chrome location
+        for path in chrome_paths:
+            if os.path.exists(path):
+                subprocess.run([path, url])
+                return True
+        # Method 2: Check registry for Chrome path
+        try:
+            with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
+                                r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe") as key:
+                chrome_path = winreg.QueryValue(key, None)
+                if chrome_path and os.path.exists(chrome_path):
+                    subprocess.run([chrome_path, url])
+                    return True
+        except:
+            pass
+        # Method 3: Try alternate registry location
+        try:
+            with winreg.OpenKey(winreg.HKEY_CURRENT_USER,
+                               r"Software\Google\Chrome\BLBeacon") as key:
+                chrome_path = os.path.join(os.path.dirname(winreg.QueryValueEx(key, "version")[0]), "chrome.exe")
+                if os.path.exists(chrome_path):
+                    subprocess.run([chrome_path, url])
+                    return True
+        except:
+            pass
+        # Method 4: Try system path or command
+        for chrome_cmd in ["chrome", "chrome.exe", "googlechrome", "google-chrome"]:
+            try:
+                subprocess.run([chrome_cmd, url], shell=True)
+                return True
+            except:
+                continue
+        # Method 5: Use Windows URL protocol handler
+        try:
+            os.startfile(url)
+            return True
+        except:
+            pass
+        # Method 6: Use rundll32
+        try:
+            cmd = f'rundll32 url.dll,FileProtocolHandler {url}'
+            subprocess.run(cmd, shell=True)
+            return True
+        except:
+            pass
+    elif system == 'Darwin':
+        chrome_paths = [
+            '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+            os.path.expanduser('~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
+        ]
+        for path in chrome_paths:
+            if os.path.exists(path):
+                subprocess.run([path, url])
+                return True
+        # Fallback to 'open' command with Chrome as the app
+        try:
+            subprocess.run(['open', '-a', 'Google Chrome', url])
+            return True
+        except:
+            pass
+    elif system == 'Linux':
+        chrome_commands = ['google-chrome', 'chrome', 'chromium', 'chromium-browser']
+        for cmd in chrome_commands:
+            try:
+                subprocess.run([cmd, url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                return True
+            except:
+                continue
+    print(f"Could not open {filename} in Chrome on {system}.")
+    return False
 def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
     """
     Opens [filename] in the default OS file handler for this file type.
@@ -611,7 +910,7 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
 # ...def open_file(...)
-#%% File list functions
+#%% File list functions (as in, files that are lists of other filenames)
 def write_list_to_file(output_file,strings):
     """
@@ -650,7 +949,9 @@ def read_list_from_file(filename):
     return file_list
-def _copy_file(input_output_tuple,overwrite=True,verbose=False):
+#%% File copying functions
+def _copy_file(input_output_tuple,overwrite=True,verbose=False,move=False):
     """
     Internal function for copying files from within parallel_copy_files.
     """
@@ -663,17 +964,29 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
             print('Skipping existing target file {}'.format(target_fn))
         return
+    if move:
+        action_string = 'Moving'
+    else:
+        action_string = 'Copying'
     if verbose:
-        print('Copying to target file {}'.format(target_fn))
+        print('{} to {}'.format(action_string,target_fn))
     os.makedirs(os.path.dirname(target_fn),exist_ok=True)
-    shutil.copyfile(source_fn,target_fn)
+    if move:
+        shutil.move(source_fn, target_fn)
+    else:
+        shutil.copyfile(source_fn,target_fn)
-def parallel_copy_files(input_file_to_output_file, max_workers=16,
-                        use_threads=True, overwrite=False, verbose=False):
+def parallel_copy_files(input_file_to_output_file,
+                        max_workers=16,
+                        use_threads=True,
+                        overwrite=False,
+                        verbose=False,
+                        move=False):
     """
-    Copies files from source to target according to the dict input_file_to_output_file.
+    Copy (or move) files from source to target according to the dict input_file_to_output_file.
     Args:
         input_file_to_output_file (dict): dictionary mapping source files to the target files
@@ -682,7 +995,8 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
         use_threads (bool, optional): whether to use threads (True) or processes (False) for
             parallel copying; ignored if max_workers <= 1
         overwrite (bool, optional): whether to overwrite existing destination files
-        verbose (bool, optional): enable additional debug output
+        verbose (bool, optional): enable additional debug output
+        move (bool, optional): move instead of copying
     """
     n_workers = min(max_workers,len(input_file_to_output_file))
@@ -698,13 +1012,18 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
         pool = Pool(n_workers)
     with tqdm(total=len(input_output_tuples)) as pbar:
-        for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,overwrite=overwrite,verbose=verbose),
+        for i,_ in enumerate(pool.imap_unordered(partial(_copy_file,
+                                                         overwrite=overwrite,
+                                                         verbose=verbose,
+                                                         move=move),
                                                  input_output_tuples)):
             pbar.update()
 # ...def parallel_copy_files(...)
+#%% File size functions
 def get_file_sizes(base_dir, convert_slashes=True):
     """
     Gets sizes recursively for all files in base_dir, returning a dict mapping
@@ -827,7 +1146,7 @@ def parallel_get_file_sizes(filenames,
 # ...def parallel_get_file_sizes(...)
-#%% Zip functions
+#%% Compression (zip/tar) functions
 def zip_file(input_fn, output_fn=None, overwrite=False, verbose=False, compresslevel=9):
     """
@@ -985,8 +1304,12 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
     return output_fn
-def parallel_zip_files(input_files, max_workers=16, use_threads=True, compresslevel=9,
-                       overwrite=False, verbose=False):
+def parallel_zip_files(input_files,
+                       max_workers=16,
+                       use_threads=True,
+                       compresslevel=9,
+                       overwrite=False,
+                       verbose=False):
     """
     Zips one or more files to separate output files in parallel, leaving the
     original files in place.  Each file is zipped to [filename].zip.