PyPI - dragon-ml-toolbox - Versions diffs - 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 19.14.0py3-none-any.whl → 20.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (219) hide show

{dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
ml_tools/ETL_cleaning/_basic_clean.py +351 -0
ml_tools/ETL_cleaning/_clean_tools.py +128 -0
ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
ml_tools/ETL_cleaning/_imprimir.py +13 -0
ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
ml_tools/ETL_engineering/_imprimir.py +24 -0
ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
ml_tools/GUI_tools/_imprimir.py +12 -0
ml_tools/IO_tools/_IO_loggers.py +235 -0
ml_tools/IO_tools/_IO_save_load.py +151 -0
ml_tools/IO_tools/_IO_utils.py +140 -0
ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
ml_tools/IO_tools/_imprimir.py +14 -0
ml_tools/MICE/_MICE_imputation.py +132 -0
ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
ml_tools/MICE/_imprimir.py +11 -0
ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
ml_tools/ML_callbacks/_base.py +101 -0
ml_tools/ML_callbacks/_checkpoint.py +232 -0
ml_tools/ML_callbacks/_early_stop.py +208 -0
ml_tools/ML_callbacks/_imprimir.py +12 -0
ml_tools/ML_callbacks/_scheduler.py +197 -0
ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
ml_tools/ML_chain/_dragon_chain.py +140 -0
ml_tools/ML_chain/_imprimir.py +11 -0
ml_tools/ML_configuration/__init__.py +90 -0
ml_tools/ML_configuration/_base_model_config.py +69 -0
ml_tools/ML_configuration/_finalize.py +366 -0
ml_tools/ML_configuration/_imprimir.py +47 -0
ml_tools/ML_configuration/_metrics.py +593 -0
ml_tools/ML_configuration/_models.py +206 -0
ml_tools/ML_configuration/_training.py +124 -0
ml_tools/ML_datasetmaster/__init__.py +28 -0
ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
ml_tools/ML_datasetmaster/_imprimir.py +15 -0
ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
ml_tools/ML_evaluation/__init__.py +53 -0
ml_tools/ML_evaluation/_classification.py +629 -0
ml_tools/ML_evaluation/_feature_importance.py +409 -0
ml_tools/ML_evaluation/_imprimir.py +25 -0
ml_tools/ML_evaluation/_loss.py +92 -0
ml_tools/ML_evaluation/_regression.py +273 -0
ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
ml_tools/ML_finalize_handler/__init__.py +10 -0
ml_tools/ML_finalize_handler/_imprimir.py +8 -0
ml_tools/ML_inference/__init__.py +22 -0
ml_tools/ML_inference/_base_inference.py +166 -0
ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
ml_tools/ML_inference/_dragon_inference.py +332 -0
ml_tools/ML_inference/_imprimir.py +11 -0
ml_tools/ML_inference/_multi_inference.py +180 -0
ml_tools/ML_inference_sequence/__init__.py +10 -0
ml_tools/ML_inference_sequence/_imprimir.py +8 -0
ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
ml_tools/ML_inference_vision/__init__.py +10 -0
ml_tools/ML_inference_vision/_imprimir.py +8 -0
ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
ml_tools/ML_models/__init__.py +32 -0
ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
ml_tools/ML_models/_base_mlp_attention.py +198 -0
ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
ml_tools/ML_models/_dragon_tabular.py +248 -0
ml_tools/ML_models/_imprimir.py +18 -0
ml_tools/ML_models/_mlp_attention.py +134 -0
ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
ml_tools/ML_models_sequence/__init__.py +10 -0
ml_tools/ML_models_sequence/_imprimir.py +8 -0
ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
ml_tools/ML_models_vision/__init__.py +29 -0
ml_tools/ML_models_vision/_base_wrapper.py +254 -0
ml_tools/ML_models_vision/_image_classification.py +182 -0
ml_tools/ML_models_vision/_image_segmentation.py +108 -0
ml_tools/ML_models_vision/_imprimir.py +16 -0
ml_tools/ML_models_vision/_object_detection.py +135 -0
ml_tools/ML_optimization/__init__.py +21 -0
ml_tools/ML_optimization/_imprimir.py +13 -0
ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
ml_tools/ML_optimization/_single_dragon.py +203 -0
ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
ml_tools/ML_scaler/__init__.py +10 -0
ml_tools/ML_scaler/_imprimir.py +8 -0
ml_tools/ML_trainer/__init__.py +20 -0
ml_tools/ML_trainer/_base_trainer.py +297 -0
ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
ml_tools/ML_trainer/_imprimir.py +10 -0
ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
ml_tools/ML_utilities/_artifact_finder.py +382 -0
ml_tools/ML_utilities/_imprimir.py +16 -0
ml_tools/ML_utilities/_inspection.py +325 -0
ml_tools/ML_utilities/_train_tools.py +205 -0
ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
ml_tools/ML_vision_transformers/_imprimir.py +14 -0
ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
ml_tools/PSO_optimization/_imprimir.py +10 -0
ml_tools/SQL/__init__.py +7 -0
ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
ml_tools/SQL/_imprimir.py +8 -0
ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
ml_tools/VIF/_imprimir.py +10 -0
ml_tools/_core/__init__.py +7 -1
ml_tools/_core/_logger.py +8 -18
ml_tools/_core/_schema_load_ops.py +43 -0
ml_tools/_core/_script_info.py +2 -2
ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
ml_tools/data_exploration/_analysis.py +214 -0
ml_tools/data_exploration/_cleaning.py +566 -0
ml_tools/data_exploration/_features.py +583 -0
ml_tools/data_exploration/_imprimir.py +32 -0
ml_tools/data_exploration/_plotting.py +487 -0
ml_tools/data_exploration/_schema_ops.py +176 -0
ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
ml_tools/ensemble_evaluation/_imprimir.py +14 -0
ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
ml_tools/ensemble_inference/_imprimir.py +9 -0
ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
ml_tools/ensemble_learning/_imprimir.py +10 -0
ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
ml_tools/excel_handler/_imprimir.py +13 -0
ml_tools/{keys.py → keys/__init__.py} +4 -1
ml_tools/keys/_imprimir.py +11 -0
ml_tools/{_core → keys}/_keys.py +2 -0
ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
ml_tools/math_utilities/_imprimir.py +11 -0
ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
ml_tools/optimization_tools/_imprimir.py +13 -0
ml_tools/optimization_tools/_optimization_bounds.py +236 -0
ml_tools/optimization_tools/_optimization_plots.py +218 -0
ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
ml_tools/path_manager/_imprimir.py +15 -0
ml_tools/path_manager/_path_tools.py +346 -0
ml_tools/plot_fonts/__init__.py +8 -0
ml_tools/plot_fonts/_imprimir.py +8 -0
ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
ml_tools/schema/__init__.py +15 -0
ml_tools/schema/_feature_schema.py +223 -0
ml_tools/schema/_gui_schema.py +191 -0
ml_tools/schema/_imprimir.py +10 -0
ml_tools/{serde.py → serde/__init__.py} +4 -2
ml_tools/serde/_imprimir.py +10 -0
ml_tools/{_core → serde}/_serde.py +3 -8
ml_tools/{utilities.py → utilities/__init__.py} +11 -6
ml_tools/utilities/_imprimir.py +18 -0
ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
ml_tools/utilities/_utility_tools.py +192 -0
dragon_ml_toolbox-19.14.0.dist-info/RECORD +0 -111
ml_tools/ML_chaining_inference.py +0 -8
ml_tools/ML_configuration.py +0 -86
ml_tools/ML_configuration_pytab.py +0 -14
ml_tools/ML_datasetmaster.py +0 -10
ml_tools/ML_evaluation.py +0 -16
ml_tools/ML_evaluation_multi.py +0 -12
ml_tools/ML_finalize_handler.py +0 -8
ml_tools/ML_inference.py +0 -12
ml_tools/ML_models.py +0 -14
ml_tools/ML_models_advanced.py +0 -14
ml_tools/ML_models_pytab.py +0 -14
ml_tools/ML_optimization.py +0 -14
ml_tools/ML_optimization_pareto.py +0 -8
ml_tools/ML_scaler.py +0 -8
ml_tools/ML_sequence_datasetmaster.py +0 -8
ml_tools/ML_sequence_evaluation.py +0 -10
ml_tools/ML_sequence_inference.py +0 -8
ml_tools/ML_sequence_models.py +0 -8
ml_tools/ML_trainer.py +0 -12
ml_tools/ML_vision_datasetmaster.py +0 -12
ml_tools/ML_vision_evaluation.py +0 -10
ml_tools/ML_vision_inference.py +0 -8
ml_tools/ML_vision_models.py +0 -18
ml_tools/SQL.py +0 -8
ml_tools/_core/_ETL_cleaning.py +0 -694
ml_tools/_core/_IO_tools.py +0 -498
ml_tools/_core/_ML_callbacks.py +0 -702
ml_tools/_core/_ML_configuration.py +0 -1332
ml_tools/_core/_ML_configuration_pytab.py +0 -102
ml_tools/_core/_ML_evaluation.py +0 -867
ml_tools/_core/_ML_evaluation_multi.py +0 -544
ml_tools/_core/_ML_inference.py +0 -646
ml_tools/_core/_ML_models.py +0 -668
ml_tools/_core/_ML_models_pytab.py +0 -693
ml_tools/_core/_ML_trainer.py +0 -2323
ml_tools/_core/_ML_utilities.py +0 -886
ml_tools/_core/_ML_vision_models.py +0 -644
ml_tools/_core/_data_exploration.py +0 -1909
ml_tools/_core/_optimization_tools.py +0 -493
ml_tools/_core/_schema.py +0 -359
ml_tools/plot_fonts.py +0 -8
ml_tools/schema.py +0 -12
{dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-19.14.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0

ml_tools/optimization_tools/_optimization_plots.py ADDED Viewed

@@ -0,0 +1,218 @@
+from typing import Union, Optional
+from pathlib import Path
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from ..utilities import yield_dataframes_from_dir
+from ..path_manager import sanitize_filename, make_fullpath, list_csv_paths
+from .._core import get_logger
+_LOGGER = get_logger("Optimization Plots")
+__all__ = [
+    "plot_optimal_feature_distributions",
+    "plot_optimal_feature_distributions_from_dataframe",
+]
+def plot_optimal_feature_distributions(results_dir: Union[str, Path],
+                                       verbose: bool=False,
+                                       target_columns: Optional[list[str]] = None):
+    """
+    Analyzes optimization results and plots the distribution of optimal values.
+    This function is compatible with mixed-type CSVs (strings for
+    categorical features, numbers for continuous). It automatically
+    detects the data type for each feature and generates:
+    - A Bar Plot for categorical (string) features.
+    - A KDE Plot for continuous (numeric) features.
+    Plots are saved in a subdirectory inside the source directory.
+    Parameters
+    ----------
+    results_dir : str | Path
+        The path to the directory containing the optimization result CSV files.
+    target_columns (list[str] | None):
+        A list of target column names to explicitly exclude from plotting. If None, it defaults to excluding only the last column (assumed as the target).
+    """
+    # Check results_dir and create output path
+    results_path = make_fullpath(results_dir, enforce="directory")
+    output_path = make_fullpath(results_path / "DistributionPlots", make=True)
+    # Check that the directory contains csv files
+    list_csv_paths(results_path, verbose=False, raise_on_empty=True)
+    # --- Data Loading and Preparation ---
+    _LOGGER.debug(f"📁 Starting analysis from results in: '{results_dir}'")
+    data_to_plot = []
+    for df, df_name in yield_dataframes_from_dir(results_path, verbose=True):
+        if df.shape[1] < 2:
+            _LOGGER.warning(f"Skipping '{df_name}': must have at least 2 columns (feature + target).")
+            continue
+        # --- Column selection logic ---
+        if target_columns:
+            # 1. Explicitly drop known targets to isolate features
+            existing_targets = [c for c in target_columns if c in df.columns]
+            features_df = df.drop(columns=existing_targets)
+            if features_df.empty:
+                _LOGGER.warning(f"Skipping '{df_name}': All columns were dropped based on target_columns list.")
+                continue
+        else:
+            # 2. Fallback: Assume the last column is the only target
+            features_df = df.iloc[:, :-1]
+        # 3. Melt the filtered dataframe
+        melted_df = features_df.melt(var_name='feature', value_name='value')
+        # Set target as the filename (or joined target names) to differentiate sources
+        melted_df['target'] = '\n'.join(target_columns) if target_columns else df_name
+        data_to_plot.append(melted_df)
+    if not data_to_plot:
+        _LOGGER.error("No valid data to plot after processing all CSVs.")
+        return
+    long_df = pd.concat(data_to_plot, ignore_index=True)
+    # --- Delegate to Helper ---
+    _generate_and_save_feature_plots(long_df, output_path, verbose)
+def plot_optimal_feature_distributions_from_dataframe(dataframe: pd.DataFrame,
+                                                      save_dir: Union[str, Path],
+                                                      verbose: bool=False,
+                                                      target_columns: Optional[list[str]] = None):
+    """
+    Analyzes a single dataframe of optimization results and plots the distribution of optimal values.
+    This function is compatible with mixed-type data (strings for categorical features,
+    numbers for continuous). It automatically detects the data type for each feature
+    and generates:
+    - A Bar Plot for categorical (string) features.
+    - A KDE Plot for continuous (numeric) features.
+    Plots are saved in a 'DistributionPlots' subdirectory inside the save_dir.
+    Parameters
+    ----------
+    dataframe : pd.DataFrame
+        The dataframe containing the optimization results (features + target/s).
+    save_dir : str | Path
+        The directory where the 'DistributionPlots' folder will be created.
+    verbose : bool, optional
+        If True, logs details about which plot type is chosen for each feature.
+    target_columns : list[str] | None
+        A list of target column names to explicitly exclude from plotting.
+        If None, it defaults to excluding only the last column (assumed as the target).
+    """
+    # Check results_dir and create output path
+    root_path = make_fullpath(save_dir, make=True, enforce="directory")
+    output_path = make_fullpath(root_path / "DistributionPlots", make=True, enforce="directory")
+    _LOGGER.debug(f"📁 Starting analysis from provided DataFrame. Output: '{output_path}'")
+    if dataframe.empty:
+        _LOGGER.error("Provided dataframe is empty.")
+        return
+    if dataframe.shape[1] < 2:
+        _LOGGER.warning("DataFrame has fewer than 2 columns. Expecting at least one feature and one target.")
+    # --- Data Preparation ---
+    if target_columns:
+        # Explicitly drop known targets to isolate features
+        existing_targets = [c for c in target_columns if c in dataframe.columns]
+        features_df = dataframe.drop(columns=existing_targets)
+        target_label = '\n'.join(target_columns)
+    else:
+        # Fallback: Assume the last column is the only target
+        features_df = dataframe.iloc[:, :-1]
+        target_label = "Optimization Result"
+    if features_df.empty:
+        _LOGGER.warning("Skipping plotting: All columns were dropped based on target_columns list.")
+        return
+    # Melt and assign static target label
+    long_df = features_df.melt(var_name='feature', value_name='value')
+    long_df['target'] = target_label
+    # --- Delegate to Helper ---
+    _generate_and_save_feature_plots(long_df, output_path, verbose)
+def _generate_and_save_feature_plots(long_df: pd.DataFrame, output_path: Path, verbose: bool) -> None:
+    """
+    Private helper: iterates over a melted DataFrame (columns: feature, value, target)
+    and generates/saves the appropriate plot (Bar or KDE) for each feature.
+    """
+    features = long_df['feature'].unique()
+    unique_targets = long_df['target'].unique()
+    _LOGGER.info(f"📊 Found data for {len(features)} features. Generating plots...")
+    for feature_name in features:
+        plt.figure(figsize=(12, 7))
+        # .copy() to ensure we are working with a distinct object
+        feature_df = long_df[long_df['feature'] == feature_name].copy()
+        # --- Type-checking logic ---
+        feature_df['numeric_value'] = pd.to_numeric(feature_df['value'], errors='coerce')
+        # If *any* value failed conversion (is NaN), treat it as categorical.
+        if feature_df['numeric_value'].isna().any():
+            # --- PLOT 1: CATEGORICAL (String-based) ---
+            if verbose:
+                print(f"    Plotting '{feature_name}' as categorical (bar plot).")
+            # Calculate percentages for a clean bar plot
+            norm_df = (feature_df.groupby('target')['value']
+                       .value_counts(normalize=True)
+                       .mul(100)
+                       .rename('percent')
+                       .reset_index())
+            ax = sns.barplot(data=norm_df, x='value', y='percent', hue='target')
+            plt.ylabel("Frequency (%)", fontsize=12)
+            ax.set_ylim(0, 100)
+            # always rotate x-ticks for categorical clarity
+            plt.xticks(rotation=45, ha='right')
+        else:
+            # --- PLOT 2: CONTINUOUS (Numeric-based) ---
+            if verbose:
+                print(f"    Plotting '{feature_name}' as continuous (KDE plot).")
+            ax = sns.kdeplot(data=feature_df, x='numeric_value', hue='target',
+                             fill=True, alpha=0.1, warn_singular=False)
+            plt.xlabel("Feature Value", fontsize=12)
+            plt.ylabel("Density", fontsize=12)
+        # --- Common settings for both plot types ---
+        plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
+        plt.grid(axis='y', alpha=0.5, linestyle='--')
+        legend = ax.get_legend()
+        if legend:
+            legend.set_title('Target')
+        sanitized_feature_name = sanitize_filename(feature_name)
+        plot_filename = output_path / f"Distribution_{sanitized_feature_name}.svg"
+        plt.savefig(plot_filename, bbox_inches='tight')
+        plt.close()
+    _LOGGER.info(f"All plots saved successfully to: '{output_path}'")

ml_tools/{path_manager.py → path_manager/__init__.py} RENAMED Viewed

@@ -1,5 +1,6 @@
-from ._core._path_manager import (
-    DragonPathManager,
+from ._dragonmanager import DragonPathManager
+from ._path_tools import (
     make_fullpath,
     sanitize_filename,
     list_csv_paths,
@@ -7,9 +8,11 @@ from ._core._path_manager import (
     list_subdirectories,
     clean_directory,
     safe_move,
-    info
 )
+from ._imprimir import info
 __all__ = [
     "DragonPathManager",
     "make_fullpath",

ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} RENAMED Viewed

@@ -1,25 +1,17 @@
-from typing import Optional, List, Dict, Union, Literal
+from typing import Optional, Union
 from pathlib import Path
-import re
 import sys
-import shutil
-from ._script_info import _script_info
-from ._logger import get_logger
+from .._core import get_logger
+from ._path_tools import sanitize_filename
-_LOGGER = get_logger("Path Manager")
+_LOGGER = get_logger("DragonPathManager")
 __all__ = [
-    "DragonPathManager",
-    "make_fullpath",
-    "sanitize_filename",
-    "list_csv_paths",
-    "list_files_by_extension",
-    "list_subdirectories",
-    "clean_directory",
-    "safe_move",
+    "DragonPathManager"
 ]
@@ -37,7 +29,7 @@ class DragonPathManager:
     def __init__(
         self,
         anchor_file: str,
-        base_directories: Optional[List[str]] = None,
+        base_directories: Optional[list[str]] = None,
         strict_to_root: bool = True
     ):
         """
@@ -61,7 +53,7 @@ class DragonPathManager:
         resolved_anchor_path = Path(anchor_file).resolve()
         self._package_name = resolved_anchor_path.parent.name
         self._is_bundled, bundle_root = self._get_bundle_root()
-        self._paths: Dict[str, Path] = {}
+        self._paths: dict[str, Path] = {}
         self._strict_to_root = strict_to_root
         if self._is_bundled:
@@ -111,12 +103,12 @@ class DragonPathManager:
             _LOGGER.error(f"Path key '{key}' cannot start with underscores.")
             raise ValueError()
-    def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
+    def update(self, new_paths: dict[str, Union[str, Path]]) -> None:
         """
         Adds new paths in the manager.
         Args:
-            new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
+            new_paths (dict[str, Union[str, Path]]): A dictionary where keys are
                                     the identifiers and values are the
                                     Path objects to store.
         """
@@ -135,7 +127,7 @@ class DragonPathManager:
     def _sanitize_key(self, key: str):
         return sanitize_filename(key)
-    def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
+    def make_dirs(self, keys: Optional[list[str]] = None, verbose: bool = False) -> None:
         """
         Creates directory structures for registered paths in writable locations.
@@ -325,331 +317,3 @@ class DragonPathManager:
         # Store absolute Path.
         self._paths[sanitized_name] = new_path
-def make_fullpath(
-        input_path: Union[str, Path],
-        make: bool = False,
-        verbose: bool = False,
-        enforce: Optional[Literal["directory", "file"]] = None
-    ) -> Path:
-    """
-    Resolves a string or Path into an absolute Path, optionally creating it.
-    - If the path exists, it is returned.
-    - If the path does not exist and `make=True`, it will:
-        - Create the file if the path has a suffix
-        - Create the directory if it has no suffix
-    - If `make=False` and the path does not exist, an error is raised.
-    - If `enforce`, raises an error if the resolved path is not what was enforced.
-    - Optionally prints whether the resolved path is a file or directory.
-    Parameters:
-        input_path (str | Path):
-            Path to resolve.
-        make (bool):
-            If True, attempt to create file or directory.
-        verbose (bool):
-            Print classification after resolution.
-        enforce ("directory" | "file" | None):
-            Raises an error if the resolved path is not what was enforced.
-    Returns:
-        Path: Resolved absolute path.
-    Raises:
-        ValueError: If the path doesn't exist and can't be created.
-        TypeError: If the final path does not match the `enforce` parameter.
-    ## 🗒️ Note:
-    Directories with dots will be treated as files.
-    Files without extension will be treated as directories.
-    """
-    path = Path(input_path).expanduser()
-    is_file = path.suffix != ""
-    try:
-        resolved = path.resolve(strict=True)
-    except FileNotFoundError:
-        if not make:
-            _LOGGER.error(f"Path does not exist: '{path}'.")
-            raise FileNotFoundError()
-        try:
-            if is_file:
-                # Create parent directories first
-                path.parent.mkdir(parents=True, exist_ok=True)
-                path.touch(exist_ok=False)
-            else:
-                path.mkdir(parents=True, exist_ok=True)
-            resolved = path.resolve(strict=True)
-        except Exception:
-            _LOGGER.exception(f"Failed to create {'file' if is_file else 'directory'} '{path}'.")
-            raise IOError()
-    if enforce == "file" and not resolved.is_file():
-        _LOGGER.error(f"Path was enforced as a file, but it is not: '{resolved}'")
-        raise TypeError()
-    if enforce == "directory" and not resolved.is_dir():
-        _LOGGER.error(f"Path was enforced as a directory, but it is not: '{resolved}'")
-        raise TypeError()
-    if verbose:
-        if resolved.is_file():
-            print("📄 Path is a File")
-        elif resolved.is_dir():
-            print("📁 Path is a Directory")
-        else:
-            print("❓ Path exists but is neither file nor directory")
-    return resolved
-def sanitize_filename(filename: str) -> str:
-    """
-    Sanitizes the name by:
-    - Stripping leading/trailing whitespace.
-    - Replacing all internal whitespace characters with underscores.
-    - Removing or replacing characters invalid in filenames.
-    Args:
-        filename (str): Base filename.
-    Returns:
-        str: A sanitized string suitable to use as a filename.
-    """
-    # Strip leading/trailing whitespace
-    sanitized = filename.strip()
-    # Replace all whitespace sequences (space, tab, etc.) with underscores
-    sanitized = re.sub(r'\s+', '_', sanitized)
-    # Conservative filter to keep filenames safe across platforms
-    sanitized = re.sub(r'[^\w\-.]', '', sanitized)
-    # Check for empty string after sanitization
-    if not sanitized:
-        _LOGGER.error("The sanitized filename is empty. The original input may have contained only invalid characters.")
-        raise ValueError()
-    return sanitized
-def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
-    """
-    Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
-    Parameters:
-        directory (str | Path): Path to the directory containing `.csv` files.
-        verbose (bool): If True, prints found files.
-        raise_on_empty (bool): If True, raises IOError if no files are found.
-    Returns:
-        (dict[str, Path]): Dictionary mapping {filename: filepath}.
-    """
-    # wraps the more general function
-    return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
-def list_files_by_extension(
-    directory: Union[str, Path],
-    extension: str,
-    verbose: bool = True,
-    raise_on_empty: bool = True
-) -> dict[str, Path]:
-    """
-    Lists all files with the specified extension in the given directory and returns a mapping:
-    filenames (without extensions) to their absolute paths.
-    Parameters:
-        directory (str | Path): Path to the directory to search in.
-        extension (str): File extension to search for (e.g., 'json', 'txt').
-        verbose (bool): If True, logs the files found.
-        raise_on_empty (bool): If True, raises IOError if no matching files are found.
-    Returns:
-        (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
-    """
-    dir_path = make_fullpath(directory, enforce="directory")
-    # Normalize the extension (remove leading dot if present)
-    normalized_ext = extension.lstrip(".").lower()
-    pattern = f"*.{normalized_ext}"
-    matched_paths = list(dir_path.glob(pattern))
-    if not matched_paths:
-        msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
-        if raise_on_empty:
-            _LOGGER.error(msg)
-            raise IOError()
-        else:
-            if verbose:
-                _LOGGER.warning(msg)
-            return {}
-    name_path_dict = {p.stem: p for p in matched_paths}
-    if verbose:
-        _LOGGER.info(f"📂 '{normalized_ext.upper()}' files found:")
-        for name in name_path_dict:
-            print(f"\t{name}")
-    return name_path_dict
-def list_subdirectories(
-    root_dir: Union[str, Path],
-    verbose: bool = True,
-    raise_on_empty: bool = True
-) -> dict[str, Path]:
-    """
-    Scans a directory and returns a dictionary of its immediate subdirectories.
-    Args:
-        root_dir (str | Path): The path to the directory to scan.
-        verbose (bool): If True, prints the number of directories found.
-        raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
-    Returns:
-        dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
-    """
-    root_path = make_fullpath(root_dir, enforce="directory")
-    directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
-    if len(directories) < 1:
-        msg = f"No subdirectories found inside '{root_path}'"
-        if raise_on_empty:
-            _LOGGER.error(msg)
-            raise IOError()
-        else:
-            if verbose:
-                _LOGGER.warning(msg)
-            return {}
-    if verbose:
-        count = len(directories)
-        # Use pluralization for better readability
-        plural = 'ies' if count != 1 else 'y'
-        print(f"Found {count} subdirector{plural} in '{root_path.name}'.")
-    # Create a dictionary where the key is the directory's name (a string)
-    # and the value is the full Path object.
-    dir_map = {p.name: p for p in directories}
-    return dir_map
-def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
-    """
-    ⚠️  DANGER: DESTRUCTIVE OPERATION ⚠️
-    Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
-    Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
-    Args:
-        directory (str | Path): The directory path to clean.
-        verbose (bool): If True, prints the name of each top-level item deleted.
-    """
-    target_dir = make_fullpath(directory, enforce="directory")
-    if verbose:
-        _LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
-    for item in target_dir.iterdir():
-        # Safety Check: Skip hidden files/dirs
-        if item.name.startswith("."):
-            continue
-        try:
-            if item.is_file() or item.is_symlink():
-                item.unlink()
-                if verbose:
-                    print(f"    🗑️  Deleted file: {item.name}")
-            elif item.is_dir():
-                shutil.rmtree(item)
-                if verbose:
-                    print(f"    🗑️  Deleted directory: {item.name}")
-        except Exception as e:
-            _LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
-            continue
-def safe_move(
-    source: Union[str, Path],
-    final_destination: Union[str, Path],
-    rename: Optional[str] = None,
-    overwrite: bool = False
-) -> Path:
-    """
-    Moves a file or directory to a destination directory with safety checks.
-    Features:
-    - Supports optional renaming (sanitized automatically).
-    - PRESERVES file extensions during renaming (cannot be modified).
-    - Prevents accidental overwrites unless explicit.
-    Args:
-        source (str | Path): The file or directory to move.
-        final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
-        rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
-        overwrite (bool): If True, overwrites the destination path if it exists.
-    Returns:
-        Path: The new absolute path of the moved item.
-    """
-    # 1. Validation and Setup
-    src_path = make_fullpath(source, make=False)
-    # Ensure destination directory exists
-    dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
-    # 2. Determine Target Name
-    if rename:
-        sanitized_name = sanitize_filename(rename)
-        if src_path.is_file():
-            # Strict Extension Preservation
-            final_name = f"{sanitized_name}{src_path.suffix}"
-        else:
-            final_name = sanitized_name
-    else:
-        final_name = src_path.name
-    final_path = dest_dir_path / final_name
-    # 3. Safety Checks (Collision Detection)
-    if final_path.exists():
-        if not overwrite:
-            _LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
-            raise FileExistsError()
-        # Smart Overwrite Handling
-        if final_path.is_dir():
-            if src_path.is_file():
-                _LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
-                raise IsADirectoryError()
-            # If overwriting a directory, we must remove the old one first to avoid nesting/errors
-            shutil.rmtree(final_path)
-        else:
-            # Destination is a file
-            if src_path.is_dir():
-                _LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
-                raise FileExistsError()
-            final_path.unlink()
-    # 4. Perform Move
-    try:
-        shutil.move(str(src_path), str(final_path))
-        return final_path
-    except Exception as e:
-        _LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
-        raise e
-def info():
-    _script_info(__all__)

ml_tools/path_manager/_imprimir.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .._core import _imprimir_disponibles
+_GRUPOS = [
+    "DragonPathManager",
+    "make_fullpath",
+    "sanitize_filename",
+    "list_csv_paths",
+    "list_files_by_extension",
+    "list_subdirectories",
+    "clean_directory",
+    "safe_move",
+]
+def info():
+    _imprimir_disponibles(_GRUPOS)

dragon-ml-toolbox 19.14.0__py3-none-any.whl → 20.0.0__py3-none-any.whl

dragon-ml-toolbox 19.14.0py3-none-any.whl → 20.0.0py3-none-any.whl