PyPI - megadetector - Versions diffs - 5.0.27__py3-none-any.whl → 5.0.28__py3-none-any.whl - Mend - Supply Chain Defender

megadetector 5.0.27py3-none-any.whl → 5.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (26) hide show

megadetector/utils/path_utils.py CHANGED Viewed

@@ -72,7 +72,7 @@ def recursive_file_list(base_dir,
     assert os.path.isdir(base_dir), '{} is not a folder'.format(base_dir)
     all_files = []
     if recursive:
         for root, _, filenames in os.walk(base_dir):
             for filename in filenames:
@@ -454,6 +454,25 @@ def top_level_folder(p):
 # ...top_level_folder()
+def path_join(*paths, convert_slashes=True):
+    r"""
+    Wrapper for os.path.join that optionally converts backslashes to forward slashes.
+    Args:
+        *paths (variable-length set of strings): Path components to be joined.
+        convert_slashes (bool, optional): whether to convert \\ to /
+    Returns:
+        A string with the joined path components.
+    """
+    joined_path = os.path.join(*paths)
+    if convert_slashes:
+        return joined_path.replace('\\', '/')
+    else:
+        return joined_path
 #%% Test driver for top_level_folder
 if False:
@@ -665,10 +684,9 @@ def environment_is_wsl():
     return 'microsoft' in platform_string and 'wsl' in platform_string
-def wsl_path_to_windows_path(filename):
+def wsl_path_to_windows_path(filename, failure_behavior='none'):
     r"""
-    Converts a WSL path to a Windows path, or returns None if that's not possible.  E.g.
-    converts:
+    Converts a WSL path to a Windows path.  For example, converts:
     /mnt/e/a/b/c
@@ -678,27 +696,42 @@ def wsl_path_to_windows_path(filename):
     Args:
         filename (str): filename to convert
+        failure_behavior (str): what to do if the path can't be processed as a WSL path.
+            'none' to return None in this case, 'original' to return the original path.
     Returns:
-        str: Windows equivalent to the WSL path [filename], or [filename] if the current
-        environment is neither Windows nor WSL.
+        str: Windows equivalent to the WSL path [filename]
     """
-    if (not environment_is_wsl()) and (os.name != 'nt'):
-        return filename
+    assert failure_behavior in ('none','original'), \
+        'Unrecognized failure_behavior value {}'.format(failure_behavior)
-    if environment_is_wsl():
-        result = subprocess.run(['wslpath', '-w', filename], text=True, capture_output=True)
-    else:
-        result = subprocess.run(['wsl', 'wslpath', '-w', filename], text=True, capture_output=True)
-    if result.returncode != 0:
-        print('Could not convert path {} from WSL to Windows'.format(filename))
-        return None
+    # Check whether the path follows the standard WSL mount pattern
+    wsl_path_pattern = r'^/mnt/([a-zA-Z])(/.*)?$'
+    match = re.match(wsl_path_pattern, filename)
-    return result.stdout.strip()
+    if match:
+        # Extract the drive letter and the rest of the path
+        drive_letter = match.group(1)
+        path_remainder = match.group(2) if match.group(2) else ''
+        # Convert forward slashes to backslashes for Windows
+        path_remainder = path_remainder.replace('/', '\\')
+        # Format the Windows path
+        windows_path = f"{drive_letter}:{path_remainder}"
+        return windows_path
+    if failure_behavior == 'none':
+        return None
+    else:
+        return filename
-def windows_path_to_wsl_path(filename):
+# ...def wsl_path_to_windows_path(...)
+def windows_path_to_wsl_path(filename, failure_behavior='none'):
     r"""
     Converts a Windows path to a WSL path, or returns None if that's not possible.  E.g.
     converts:
@@ -711,25 +744,38 @@ def windows_path_to_wsl_path(filename):
     Args:
         filename (str): filename to convert
+        failure_behavior (str): what to do if the path can't be processed as a Windows path.
+            'none' to return None in this case, 'original' to return the original path.
     Returns:
-        str: WSL equivalent to the Windows path [filename], or [filename] if the current
-        environment is neither Windows nor WSL.
+        str: WSL equivalent to the Windows path [filename]
     """
-    if (not environment_is_wsl()) and (os.name != 'nt'):
-        return filename
+    assert failure_behavior in ('none','original'), \
+        'Unrecognized failure_behavior value {}'.format(failure_behavior)
-    if environment_is_wsl():
-        result = subprocess.run(['wslpath', '-u', filename], text=True, capture_output=True)
-    else:
-        result = subprocess.run(['wsl', 'wslpath', '-u', filename], text=True, capture_output=True)
-    if result.returncode != 0:
-        print('Could not convert path {} from Windows to WSL'.format(filename))
+    filename = filename.replace('\\', '/')
+    # Check whether the path follows a Windows drive letter pattern
+    windows_path_pattern = r'^([a-zA-Z]):(/.*)?$'
+    match = re.match(windows_path_pattern, filename)
+    if match:
+        # Extract the drive letter and the rest of the path
+        drive_letter = match.group(1).lower()  # Convert to lowercase for WSL
+        path_remainder = match.group(2) if match.group(2) else ''
+        # Format the WSL path
+        wsl_path = f"/mnt/{drive_letter}{path_remainder}"
+        return wsl_path
+    if failure_behavior == 'none':
         return None
+    else:
+        return filename
-    return result.stdout.strip()
+# ...def window_path_to_wsl_path(...)
 def open_file_in_chrome(filename):
     """

megadetector/utils/split_locations_into_train_val.py CHANGED Viewed

@@ -28,7 +28,8 @@ def split_locations_into_train_val(location_to_category_counts,
                                    target_val_fraction=0.15,
                                    category_to_max_allowable_error=None,
                                    category_to_error_weight=None,
-                                   default_max_allowable_error=0.1):
+                                   default_max_allowable_error=0.1,
+                                   require_complete_coverage=True):
     """
     Splits a list of location IDs into training and validation, targeting a specific
     train/val split for each category, but allowing some categories to be tighter or looser
@@ -63,6 +64,8 @@ def split_locations_into_train_val(location_to_category_counts,
         default_max_allowable_error (float, optional): the maximum allowable error for categories not
             present in [category_to_max_allowable_error].  Set to None (or >= 1.0) to disable hard
             constraints for categories not present in [category_to_max_allowable_error]
+        require_complete_coverage (bool, optional): require that every category appear in both train and
+            val
     Returns:
         tuple: A two-element tuple:
@@ -125,7 +128,7 @@ def split_locations_into_train_val(location_to_category_counts,
             category_val_fraction = category_val_count / (category_val_count + category_train_count)
             category_to_val_fraction[category_id] = category_val_fraction
-        # Absolute deviation from the target val fraction for each categorys
+        # Absolute deviation from the target val fraction for each category
         category_errors = {}
         weighted_category_errors = {}
@@ -161,18 +164,28 @@ def split_locations_into_train_val(location_to_category_counts,
         seed_satisfies_hard_constraints = True
         for category in category_to_val_fraction:
-            if category in category_to_max_allowable_error:
+            if category in category_to_max_allowable_error:
                 max_allowable_error = category_to_max_allowable_error[category]
             else:
                 if default_max_allowable_error is None:
                     continue
                 max_allowable_error = default_max_allowable_error
             val_fraction = category_to_val_fraction[category]
+            # If necessary, verify that this category doesn't *only* appear in train or val
+            if require_complete_coverage:
+                if (val_fraction == 0.0) or (val_fraction == 1.0):
+                    seed_satisfies_hard_constraints = False
+                    break
+            # Check whether this category exceeds the hard maximum deviation
             category_error = abs(val_fraction - target_val_fraction)
             if category_error > max_allowable_error:
                 seed_satisfies_hard_constraints = False
                 break
+        # ...for each category
         if seed_satisfies_hard_constraints:
             random_seed_to_weighted_average_error[random_seed] = weighted_average_error