PyPI - dragon-ml-toolbox - Versions diffs - 19.10.0__py3-none-any.whl → 19.12.0__py3-none-any.whl - Mend

dragon-ml-toolbox 19.10.0py3-none-any.whl → 19.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/METADATA +1 -1
{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/RECORD +19 -19
ml_tools/ML_callbacks.py +8 -4
ml_tools/_core/_MICE_imputation.py +2 -2
ml_tools/_core/_ML_callbacks.py +461 -171
ml_tools/_core/_ML_trainer.py +50 -50
ml_tools/_core/_ML_utilities.py +153 -50
ml_tools/_core/_PSO_optimization.py +1 -1
ml_tools/_core/_ensemble_inference.py +1 -1
ml_tools/_core/_keys.py +32 -1
ml_tools/_core/_optimization_tools.py +1 -1
ml_tools/_core/_path_manager.py +149 -27
ml_tools/_core/_utilities.py +6 -2
ml_tools/keys.py +2 -0
ml_tools/path_manager.py +5 -1
{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/top_level.txt +0 -0

ml_tools/_core/_path_manager.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from pprint import pprint
 from typing import Optional, List, Dict, Union, Literal
 from pathlib import Path
 import re
 import sys
+import shutil
 from ._script_info import _script_info
 from ._logger import get_logger
@@ -17,7 +17,9 @@ __all__ = [
     "sanitize_filename",
     "list_csv_paths",
     "list_files_by_extension",
-    "list_subdirectories"
+    "list_subdirectories",
+    "clean_directory",
+    "safe_move",
 ]
@@ -436,35 +438,28 @@ def sanitize_filename(filename: str) -> str:
     return sanitized
-def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
+def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
     """
     Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
     Parameters:
         directory (str | Path): Path to the directory containing `.csv` files.
+        verbose (bool): If True, prints found files.
+        raise_on_empty (bool): If True, raises IOError if no files are found.
     Returns:
         (dict[str, Path]): Dictionary mapping {filename: filepath}.
     """
-    dir_path = make_fullpath(directory)
+    # wraps the more general function
+    return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
-    csv_paths = list(dir_path.glob("*.csv"))
-    if not csv_paths:
-        _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
-        raise IOError()
-    # make a dictionary of paths and names
-    name_path_dict = {p.stem: p for p in csv_paths}
-    if verbose:
-        _LOGGER.info("🗂️ CSV files found:")
-        for name in name_path_dict.keys():
-            print(f"\t{name}")
-    return name_path_dict
-def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
+def list_files_by_extension(
+    directory: Union[str, Path],
+    extension: str,
+    verbose: bool = True,
+    raise_on_empty: bool = True
+) -> dict[str, Path]:
     """
     Lists all files with the specified extension in the given directory and returns a mapping:
     filenames (without extensions) to their absolute paths.
@@ -472,20 +467,29 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
     Parameters:
         directory (str | Path): Path to the directory to search in.
         extension (str): File extension to search for (e.g., 'json', 'txt').
+        verbose (bool): If True, logs the files found.
+        raise_on_empty (bool): If True, raises IOError if no matching files are found.
     Returns:
-        (dict[str, Path]): Dictionary mapping {filename: filepath}.
+        (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
     """
-    dir_path = make_fullpath(directory)
+    dir_path = make_fullpath(directory, enforce="directory")
     # Normalize the extension (remove leading dot if present)
     normalized_ext = extension.lstrip(".").lower()
     pattern = f"*.{normalized_ext}"
     matched_paths = list(dir_path.glob(pattern))
     if not matched_paths:
-        _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
-        raise IOError()
+        msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
+        if raise_on_empty:
+            _LOGGER.error(msg)
+            raise IOError()
+        else:
+            if verbose:
+                _LOGGER.warning(msg)
+            return {}
     name_path_dict = {p.stem: p for p in matched_paths}
@@ -497,13 +501,18 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
     return name_path_dict
-def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
+def list_subdirectories(
+    root_dir: Union[str, Path],
+    verbose: bool = True,
+    raise_on_empty: bool = True
+) -> dict[str, Path]:
     """
     Scans a directory and returns a dictionary of its immediate subdirectories.
     Args:
         root_dir (str | Path): The path to the directory to scan.
         verbose (bool): If True, prints the number of directories found.
+        raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
     Returns:
         dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
@@ -513,8 +522,14 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
     directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
     if len(directories) < 1:
-        _LOGGER.error(f"No subdirectories found inside '{root_path}'")
-        raise IOError()
+        msg = f"No subdirectories found inside '{root_path}'"
+        if raise_on_empty:
+            _LOGGER.error(msg)
+            raise IOError()
+        else:
+            if verbose:
+                _LOGGER.warning(msg)
+            return {}
     if verbose:
         count = len(directories)
@@ -529,5 +544,112 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
     return dir_map
+def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
+    """
+    ⚠️  DANGER: DESTRUCTIVE OPERATION ⚠️
+    Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
+    Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
+    Args:
+        directory (str | Path): The directory path to clean.
+        verbose (bool): If True, prints the name of each top-level item deleted.
+    """
+    target_dir = make_fullpath(directory, enforce="directory")
+    if verbose:
+        _LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
+    for item in target_dir.iterdir():
+        # Safety Check: Skip hidden files/dirs
+        if item.name.startswith("."):
+            continue
+        try:
+            if item.is_file() or item.is_symlink():
+                item.unlink()
+                if verbose:
+                    print(f"    🗑️  Deleted file: {item.name}")
+            elif item.is_dir():
+                shutil.rmtree(item)
+                if verbose:
+                    print(f"    🗑️  Deleted directory: {item.name}")
+        except Exception as e:
+            _LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
+            continue
+def safe_move(
+    source: Union[str, Path],
+    final_destination: Union[str, Path],
+    rename: Optional[str] = None,
+    overwrite: bool = False
+) -> Path:
+    """
+    Moves a file or directory to a destination directory with safety checks.
+    Features:
+    - Supports optional renaming (sanitized automatically).
+    - PRESERVES file extensions during renaming (cannot be modified).
+    - Prevents accidental overwrites unless explicit.
+    Args:
+        source (str | Path): The file or directory to move.
+        final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
+        rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
+        overwrite (bool): If True, overwrites the destination path if it exists.
+    Returns:
+        Path: The new absolute path of the moved item.
+    """
+    # 1. Validation and Setup
+    src_path = make_fullpath(source, make=False)
+    # Ensure destination directory exists
+    dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
+    # 2. Determine Target Name
+    if rename:
+        sanitized_name = sanitize_filename(rename)
+        if src_path.is_file():
+            # Strict Extension Preservation
+            final_name = f"{sanitized_name}{src_path.suffix}"
+        else:
+            final_name = sanitized_name
+    else:
+        final_name = src_path.name
+    final_path = dest_dir_path / final_name
+    # 3. Safety Checks (Collision Detection)
+    if final_path.exists():
+        if not overwrite:
+            _LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
+            raise FileExistsError()
+        # Smart Overwrite Handling
+        if final_path.is_dir():
+            if src_path.is_file():
+                _LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
+                raise IsADirectoryError()
+            # If overwriting a directory, we must remove the old one first to avoid nesting/errors
+            shutil.rmtree(final_path)
+        else:
+            # Destination is a file
+            if src_path.is_dir():
+                _LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
+                raise FileExistsError()
+            final_path.unlink()
+    # 4. Perform Move
+    try:
+        shutil.move(str(src_path), str(final_path))
+        return final_path
+    except Exception as e:
+        _LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
+        raise e
 def info():
     _script_info(__all__)

ml_tools/_core/_utilities.py CHANGED Viewed

@@ -166,8 +166,12 @@ def load_dataframe_greedy(directory: Union[str, Path],
     dir_path = make_fullpath(directory, enforce="directory")
     # list all csv files and grab one (should be the only one)
-    csv_dict = list_csv_paths(directory=dir_path, verbose=False)
+    csv_dict = list_csv_paths(directory=dir_path, verbose=False, raise_on_empty=True)
+    # explicitly check that there is only one csv file
+    if len(csv_dict) > 1:
+        _LOGGER.warning(f"Multiple CSV files found in '{dir_path}'. Only one will be loaded.")
     for df_path in csv_dict.values():
         df , _df_name = load_dataframe(df_path=df_path,
                                     use_columns=use_columns,
@@ -260,7 +264,7 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True)
     - Output is streamed via a generator to support lazy loading of multiple datasets.
     """
     datasets_path = make_fullpath(datasets_dir)
-    files_dict = list_csv_paths(datasets_path, verbose=verbose)
+    files_dict = list_csv_paths(datasets_path, verbose=verbose, raise_on_empty=True)
     for df_name, df_path in files_dict.items():
         df: pd.DataFrame
         df, _ = load_dataframe(df_path, kind="pandas", verbose=verbose) # type: ignore

ml_tools/keys.py CHANGED Viewed

@@ -2,10 +2,12 @@ from ._core._keys import (
     PyTorchInferenceKeys as InferenceKeys,
     _CheckpointCallbackKeys as CheckpointCallbackKeys,
     _FinalizedFileKeys as FinalizedFileKeys,
+    _PublicTaskKeys as TaskKeys,
 )
 __all__ = [
     "InferenceKeys",
     "CheckpointCallbackKeys",
     "FinalizedFileKeys",
+    "TaskKeys",
 ]

ml_tools/path_manager.py CHANGED Viewed

@@ -5,6 +5,8 @@ from ._core._path_manager import (
     list_csv_paths,
     list_files_by_extension,
     list_subdirectories,
+    clean_directory,
+    safe_move,
     info
 )
@@ -14,5 +16,7 @@ __all__ = [
     "sanitize_filename",
     "list_csv_paths",
     "list_files_by_extension",
-    "list_subdirectories"
+    "list_subdirectories",
+    "clean_directory",
+    "safe_move",
 ]

{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 19.10.0__py3-none-any.whl → 19.12.0__py3-none-any.whl

dragon-ml-toolbox 19.10.0py3-none-any.whl → 19.12.0py3-none-any.whl