PyPI - dragon-ml-toolbox - Versions diffs - 3.7.0__py3-none-any.whl → 3.9.0__py3-none-any.whl - Mend

dragon-ml-toolbox 3.7.0py3-none-any.whl → 3.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (10) hide show

{dragon_ml_toolbox-3.7.0.dist-info → dragon_ml_toolbox-3.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 3.7.0
+Version: 3.9.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -15,8 +15,8 @@ License-File: LICENSE-THIRD-PARTY.md
 Requires-Dist: numpy<2.0
 Requires-Dist: scikit-learn
 Requires-Dist: openpyxl
-Requires-Dist: miceforest<7.0.0,>=6.0.0
-Requires-Dist: plotnine<0.13,>=0.12
+Requires-Dist: miceforest>=6.0.0
+Requires-Dist: plotnine>=0.12
 Requires-Dist: matplotlib
 Requires-Dist: seaborn
 Requires-Dist: pandas
@@ -129,6 +129,7 @@ ML_callbacks
 ML_evaluation
 ML_trainer
 ML_tutorial
+path_manager
 PSO_optimization
 RNN_forecast
 utilities

{dragon_ml_toolbox-3.7.0.dist-info → dragon_ml_toolbox-3.9.0.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-dragon_ml_toolbox-3.7.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-3.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
+dragon_ml_toolbox-3.9.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-3.9.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
 ml_tools/ETL_engineering.py,sha256=yeZsW_7zRvEcuMZbM4E2GV1dxwBoWIeJAcFFk2AK0fY,39502
-ml_tools/GUI_tools.py,sha256=3kRxok-QCN5S0q1i7yK137Bsr6c2N4M4nIvgPVAuZU0,20371
+ml_tools/GUI_tools.py,sha256=ABR1cqV09iZ2DbLfLZB7jaQVRVDbvCmj09pNkr3TDZk,18800
 ml_tools/MICE_imputation.py,sha256=rYqvwQDVtoAJJ0agXWoGzoZEHedWiA6QzcEKEIkiZ08,11388
 ml_tools/ML_callbacks.py,sha256=OT2zwORLcn49megBEgXsSUxDHoW0Ft0_v7hLEVF3jHM,13063
 ml_tools/ML_evaluation.py,sha256=oiDV6HItQloUUKCUpltV-2pogubWLBieGpc-VUwosAQ,10106
@@ -15,11 +15,12 @@ ml_tools/_particle_swarm_optimization.py,sha256=b_eNNkA89Y40hj76KauivT8KLScH1B9w
 ml_tools/_pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
 ml_tools/data_exploration.py,sha256=M7bn2q5XN9zJZJGAmMMFSFFZh8LGzC2arFelrXw3N6Q,25241
 ml_tools/datasetmaster.py,sha256=S3PKHNQZ9cyAOck8xQltVLZhaD1gFLfgHFL-aRjz4JU,30077
-ml_tools/ensemble_learning.py,sha256=CDSIygnHaNe92aJ46Fofevd7q6lowTnE98yWuIV3Y6w,37462
+ml_tools/ensemble_learning.py,sha256=p9PZwGY2OGSrJhXNzvMS_kCjK-I2JVcqiJBaVzb0GrM,42616
 ml_tools/handle_excel.py,sha256=lwds7rDLlGSCWiWGI7xNg-Z7kxAepogp0lstSFa0590,12949
 ml_tools/logger.py,sha256=UkbiU9ihBhw9VKyn3rZzisdClWV94EBV6B09_D0iUU0,6026
-ml_tools/utilities.py,sha256=0w0vka0Aj9IYOHJ6crWIb6gwpQIJnPyj3v2_dnVxHrs,23138
-dragon_ml_toolbox-3.7.0.dist-info/METADATA,sha256=kvgFjd_BRwob7xycC5rbROCkq4C6FVq3J5-VdCXEPrI,3273
-dragon_ml_toolbox-3.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-3.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-3.7.0.dist-info/RECORD,,
+ml_tools/path_manager.py,sha256=OCpESgdftbi6mOxetDMIaHhazt4N-W8pJx11X3-yNOs,8305
+ml_tools/utilities.py,sha256=HR36Q_vYnaRcpSjpNISnA7lOZ36TouHop38lPLG_twY,23146
+dragon_ml_toolbox-3.9.0.dist-info/METADATA,sha256=2R3xIuefuR9O_h71q3S49xUm2MLKQtn12jjwNFKl2mE,3273
+dragon_ml_toolbox-3.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-3.9.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-3.9.0.dist-info/RECORD,,

ml_tools/GUI_tools.py CHANGED Viewed

@@ -4,83 +4,21 @@ from typing import Optional, Callable, Any
 import traceback
 import FreeSimpleGUI as sg
 from functools import wraps
-from typing import Any, Dict, Tuple, List
+from typing import Any, Dict, Tuple, List, Literal
 from .utilities import _script_info
 import numpy as np
 from .logger import _LOGGER
+from abc import ABC, abstractmethod
 __all__ = [
-    "PathManager",
     "ConfigManager",
     "GUIFactory",
     "catch_exceptions",
-    "prepare_feature_vector",
+    "BaseFeatureHandler",
     "update_target_fields"
 ]
-# --- Path Management ---
-class PathManager:
-    """
-    Manages paths for a Python application, supporting both development mode and bundled mode via Briefcase.
-    """
-    def __init__(self, anchor_file: str):
-        """
-        Initializes the PathManager. The package name is automatically inferred
-        from the parent directory of the anchor file.
-        Args:
-            anchor_file (str): The absolute path to a file within the project's
-                               package, typically `__file__` from a module inside
-                               that package (paths.py).
-        Note:
-            This inference assumes that the anchor file's parent directory
-            has the same name as the package (e.g., `.../src/my_app/paths.py`).
-            This is a standard and recommended project structure.
-        """
-        resolved_anchor_path = Path(anchor_file).resolve()
-        self.package_name = resolved_anchor_path.parent.name
-        self._is_bundled, self._resource_path_func = self._check_bundle_status()
-        if self._is_bundled:
-            # In a Briefcase bundle, resource_path gives an absolute path
-            # to the resource directory.
-            self.package_root = self._resource_path_func(self.package_name, "") # type: ignore
-        else:
-            # In development mode, the package root is the directory
-            # containing the anchor file.
-            self.package_root = resolved_anchor_path.parent
-    def _check_bundle_status(self) -> tuple[bool, Optional[Callable]]:
-        """Checks if the app is running in a bundled environment."""
-        try:
-            # This is the function Briefcase provides in a bundled app
-            from briefcase.platforms.base import resource_path # type: ignore
-            return True, resource_path
-        except ImportError:
-            return False, None
-    def get_path(self, relative_path: str | Path) -> Path:
-        """
-        Gets the absolute path for a given resource file or directory
-        relative to the package root.
-        Args:
-            relative_path (str | Path): The path relative to the package root (e.g., 'helpers/icon.png').
-        Returns:
-            Path: The absolute path to the resource.
-        """
-        if self._is_bundled:
-            # Briefcase's resource_path handles resolving the path within the app bundle
-            return self._resource_path_func(self.package_name, str(relative_path)) # type: ignore
-        else:
-            # In dev mode, join package root with the relative path.
-            return self.package_root / relative_path
 # --- Configuration Management ---
 class _SectionProxy:
     """A helper class to represent a section of the .ini file as an object."""
@@ -273,8 +211,8 @@ class GUIFactory:
         self,
         data_dict: Dict[str, Tuple[float, float]],
         is_target: bool = False,
-        layout_mode: str = 'grid',
-        columns_per_row: int = 4
+        layout_mode: Literal["grid", "row"] = 'grid',
+        features_per_column: int = 4
     ) -> List[List[sg.Column]]:
         """
         Generates a layout for continuous features or targets.
@@ -283,7 +221,7 @@ class GUIFactory:
             data_dict (dict): Keys are feature names, values are (min, max) tuples.
             is_target (bool): If True, creates disabled inputs for displaying results.
             layout_mode (str): 'grid' for a multi-row grid layout, or 'row' for a single horizontal row.
-            columns_per_row (int): Number of feature columns per row when layout_mode is 'grid'.
+            features_per_column (int): Number of features per column when `layout_mode` is 'grid'.
         Returns:
             A list of lists of sg.Column elements, ready to be used in a window layout.
@@ -294,7 +232,7 @@ class GUIFactory:
         columns = []
         for name, (val_min, val_max) in data_dict.items():
-            key = f"TARGET_{name}" if is_target else name
+            key = name
             default_text = "" if is_target else str(val_max)
             label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
@@ -313,6 +251,7 @@ class GUIFactory:
                 range_text = sg.Text(f"Range: {int(val_min)}-{int(val_max)}", font=range_font, background_color=bg_color)
                 layout = [[label], [element], [range_text]]
+            # each feature is wrapped as a column element
             layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
             columns.append(sg.Column(layout, background_color=bg_color))
@@ -320,13 +259,13 @@ class GUIFactory:
             return [columns] # A single row containing all columns
         # Default to 'grid' layout
-        return [columns[i:i + columns_per_row] for i in range(0, len(columns), columns_per_row)]
+        return [columns[i:i + features_per_column] for i in range(0, len(columns), features_per_column)]
     def generate_combo_layout(
         self,
         data_dict: Dict[str, List[Any]],
-        layout_mode: str = 'grid',
-        columns_per_row: int = 4
+        layout_mode: Literal["grid", "row"] = 'grid',
+        features_per_column: int = 4
     ) -> List[List[sg.Column]]:
         """
         Generates a layout for categorical or binary features using Combo boxes.
@@ -334,7 +273,7 @@ class GUIFactory:
         Args:
             data_dict (dict): Keys are feature names, values are lists of options.
             layout_mode (str): 'grid' for a multi-row grid layout, or 'row' for a single horizontal row.
-            columns_per_row (int): Number of feature columns per row when layout_mode is 'grid'.
+            features_per_column (int): Number of features per column when `layout_mode` is 'grid'.
         Returns:
             A list of lists of sg.Column elements, ready to be used in a window layout.
@@ -352,13 +291,14 @@ class GUIFactory:
             )
             layout = [[label], [element]]
             layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
+            # each feature is wrapped in a Column element
             columns.append(sg.Column(layout, background_color=bg_color))
         if layout_mode == 'row':
             return [columns] # A single row containing all columns
         # Default to 'grid' layout
-        return [columns[i:i + columns_per_row] for i in range(0, len(columns), columns_per_row)]
+        return [columns[i:i + features_per_column] for i in range(0, len(columns), features_per_column)]
     # --- Window Creation ---
     def create_window(self, title: str, layout: List[List[sg.Element]], **kwargs) -> sg.Window:
@@ -412,68 +352,93 @@ def catch_exceptions(show_popup: bool = True):
     return decorator
-# --- Inference Helpers ---
-def _default_categorical_processor(feature_name: str, chosen_value: Any) -> List[float]:
-    """
-    Default processor for binary 'True'/'False' strings.
-    Returns a list containing a single float.
-    """
-    return [1.0] if str(chosen_value) == 'True' else [0.0]
-def prepare_feature_vector(
-    values: Dict[str, Any],
-    feature_order: List[str],
-    continuous_features: List[str],
-    categorical_features: List[str],
-    categorical_processor: Optional[Callable[[str, Any], List[float]]] = None
-) -> np.ndarray:
+# --- Inference Helper ---
+class BaseFeatureHandler(ABC):
     """
-    Validates and converts GUI values into a numpy array for a model.
-    This function supports label encoding and one-hot encoding via the processor.
+    An abstract base class that defines the template for preparing a model input feature vector to perform inference, from GUI inputs.
-    Args:
-        values (dict): The values dictionary from a `window.read()` call.
-        feature_order (list): A list of all feature names that have a GUI element.
-                              For one-hot encoding, this should be the name of the
-                              single GUI element (e.g., 'material_type'), not the
-                              expanded feature names (e.g., 'material_is_steel').
-        continuous_features (list): A list of names for continuous features.
-        categorical_features (list): A list of names for categorical features.
-        categorical_processor (callable, optional): A function to process categorical
-            values. It should accept (feature_name, chosen_value) and return a
-            list of floats (e.g., [1.0] for label encoding, [0.0, 1.0, 0.0] for one-hot).
-            If None, a default 'True'/'False' processor is used.
-    Returns:
-        A 1D numpy array ready for model inference.
+    A subclass must implement the `gui_input_map` property and the `process_categorical` method.
     """
-    processed_values: List[float] = []
-    # Use the provided processor or the default one
-    processor = categorical_processor or _default_categorical_processor
-    # Create sets for faster lookups
-    cont_set = set(continuous_features)
-    cat_set = set(categorical_features)
-    for name in feature_order:
-        chosen_value = values.get(name)
+    def __init__(self, expected_columns_in_order: list[str]):
+        """
+        Validates and stores the feature names in the order the model expects.
-        if chosen_value is None or chosen_value == '':
-            raise ValueError(f"Feature '{name}' is missing a value.")
+        Args:
+            expected_columns_in_order (List[str]): A list of strings with the feature names in the correct order.
+        """
+        # --- Validation Logic ---
+        if not isinstance(expected_columns_in_order, list):
+            raise TypeError("Input 'expected_columns_in_order' must be a list.")
+        if not all(isinstance(col, str) for col in expected_columns_in_order):
+            raise TypeError("All elements in the 'expected_columns_in_order' list must be strings.")
+        # -----------------------
+        self._model_feature_order = expected_columns_in_order
+    @property
+    @abstractmethod
+    def gui_input_map(self) -> Dict[str, Literal["continuous","categorical"]]:
+        """
+        Must be implemented by the subclass.
-        if name in cont_set:
-            try:
-                processed_values.append(float(chosen_value))
-            except (ValueError, TypeError):
-                raise ValueError(f"Invalid input for '{name}'. Please enter a valid number.")
+        Should return a dictionary mapping each GUI input name to its type ('continuous' or 'categorical').
+        ```python
+        #Example:
+        {'temperature': 'continuous', 'material_type': 'categorical'}
+        ```
+        """
+        pass
+    @abstractmethod
+    def process_categorical(self, feature_name: str, chosen_value: Any) -> Dict[str, float]:
+        """
+        Must be implemented by the subclass.
+        Should take a GUI categorical feature name and its chosen value, and return a dictionary mapping the one-hot-encoded feature names to their
+        float values (as expected by the inference model).
+        """
+        pass
+    def __call__(self, window_values: Dict[str, Any]) -> np.ndarray:
+        """
+        Performs the full vector preparation, returning a 1D numpy array.
+        Should not be overridden by subclasses.
+        """
+        # Stage 1: Process GUI inputs into a dictionary
+        processed_features: Dict[str, float] = {}
+        for gui_name, feature_type in self.gui_input_map.items():
+            chosen_value = window_values.get(gui_name)
+            if chosen_value is None or str(chosen_value) == '':
+                raise ValueError(f"GUI input '{gui_name}' is missing a value.")
+            if feature_type == 'continuous':
+                try:
+                    processed_features[gui_name] = float(chosen_value)
+                except (ValueError, TypeError):
+                    raise ValueError(f"Invalid number '{chosen_value}' for '{gui_name}'.")
+            elif feature_type == 'categorical':
+                feature_dict = self.process_categorical(gui_name, chosen_value)
+                processed_features.update(feature_dict)
+        # Stage 2: Assemble the final vector using the model's required order
+        final_vector: List[float] = []
-        elif name in cat_set:
-            # The processor returns a list of values (one for label, multiple for one-hot)
-            numeric_values = processor(name, chosen_value)
-            processed_values.extend(numeric_values)
+        try:
+            for feature_name in self._model_feature_order:
+                final_vector.append(processed_features[feature_name])
+        except KeyError as e:
+            raise RuntimeError(
+                f"Configuration Error: Implemented methods failed to generate "
+                f"the required model feature: '{e}'"
+                f"Check the gui_input_map and process_categorical logic."
+            )
-    return np.array(processed_values, dtype=np.float32)
+        return np.array(final_vector, dtype=np.float32)
 def update_target_fields(window: sg.Window, results_dict: Dict[str, Any]):
@@ -482,12 +447,12 @@ def update_target_fields(window: sg.Window, results_dict: Dict[str, Any]):
     Args:
         window (sg.Window): The application's window object.
-        results_dict (dict): A dictionary where keys are target key names (including 'TARGET_' prefix if necessary) and values are the predicted results.
+        results_dict (dict): A dictionary where keys are target element-keys and values are the predicted results to update.
     """
     for target_name, result in results_dict.items():
         # Format numbers to 2 decimal places, leave other types as-is
         display_value = f"{result:.2f}" if isinstance(result, (int, float)) else result
-        window[target_name].update(display_value)
+        window[target_name].update(display_value) # type: ignore
 def info():

ml_tools/ensemble_learning.py CHANGED Viewed

@@ -6,7 +6,7 @@ from matplotlib.colors import Colormap
 from matplotlib import rcdefaults
 from pathlib import Path
-from typing import Literal, Union, Optional, Iterator, Tuple
+from typing import Literal, Union, Optional, Iterator, Tuple, Dict, Any, List
 from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
 from imblearn.under_sampling import RandomUnderSampler
@@ -19,7 +19,7 @@ from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, mean_absolute_error, mean_squared_error, r2_score, roc_curve, roc_auc_score
 import shap
-from .utilities import yield_dataframes_from_dir, sanitize_filename, _script_info, serialize_object, make_fullpath
+from .utilities import yield_dataframes_from_dir, sanitize_filename, _script_info, serialize_object, make_fullpath, list_files_by_extension, deserialize_object
 from .logger import _LOGGER
 import warnings # Ignore warnings
@@ -38,7 +38,8 @@ __all__ = [
     "evaluate_model_regression",
     "get_shap_values",
     "train_test_pipeline",
-    "run_ensemble_pipeline"
+    "run_ensemble_pipeline",
+    "InferenceHandler"
 ]
 ## Type aliases
@@ -937,5 +938,124 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
     _LOGGER.info("✅ Training and evaluation complete.")
+###### 6. Inference ######
+class InferenceHandler:
+    """
+    Handles loading ensemble models and performing inference for either regression or classification tasks.
+    """
+    def __init__(self,
+                 models_dir: Union[str,Path],
+                 task: TaskType,
+                 verbose: bool=True) -> None:
+        """
+        Initializes the handler by loading all models from a directory.
+        Args:
+            models_dir (Path): The directory containing the saved .joblib model files.
+            task ("regression" | "classification"): The type of task the models perform.
+        """
+        self.models: Dict[str, Any] = dict()
+        self.task: str = task
+        self.verbose = verbose
+        self._feature_names: Optional[List[str]] = None
+        model_files = list_files_by_extension(directory=models_dir, extension="joblib")
+        for fname, fpath in model_files.items():
+            try:
+                full_object: dict
+                full_object = deserialize_object(filepath=fpath,
+                                                 verbose=self.verbose,
+                                                 raise_on_error=True) # type: ignore
+                model: Any = full_object["model"]
+                target_name: str = full_object["target_name"]
+                feature_names_list: List[str] = full_object["feature_names"]
+                # Check that feature names match
+                if self._feature_names is None:
+                    # Store the feature names from the first model loaded.
+                    self._feature_names = feature_names_list
+                elif self._feature_names != feature_names_list:
+                    # Add a warning if subsequent models have different feature names.
+                    _LOGGER.warning(f"⚠️ Mismatched feature names in {fname}. Using feature order from the first model loaded.")
+                self.models[target_name] = model
+                if self.verbose:
+                    _LOGGER.info(f"✅ Loaded model for target: {target_name}")
+            except Exception as e:
+                _LOGGER.warning(f"⚠️ Failed to load or parse {fname}: {e}")
+    @property
+    def feature_names(self) -> List[str]:
+        """
+        Getter for the list of feature names the models expect.
+        Returns an empty list if no models were loaded.
+        """
+        return self._feature_names if self._feature_names is not None else []
+    def predict(self, features: np.ndarray) -> Dict[str, Any]:
+        """
+        Predicts on a single feature vector.
+        Args:
+            features (np.ndarray): A 1D or 2D NumPy array for a single sample.
+        Returns:
+            Dict[str, Any]: A dictionary where keys are target names.
+                - For regression: The value is the single predicted float.
+                - For classification: The value is another dictionary {'label': ..., 'probabilities': ...}.
+        """
+        if features.ndim == 1:
+            features = features.reshape(1, -1)
+        if features.shape[0] != 1:
+            raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
+        results: Dict[str, Any] = dict()
+        for target_name, model in self.models.items():
+            if self.task == "regression":
+                prediction = model.predict(features)
+                results[target_name] = prediction.item()
+            else: # Classification
+                label = model.predict(features)[0]
+                probabilities = model.predict_proba(features)[0]
+                results[target_name] = {"label": label, "probabilities": probabilities}
+        if self.verbose:
+            _LOGGER.info("✅ Inference process complete.")
+        return results
+    def predict_batch(self, features: np.ndarray) -> Dict[str, Any]:
+        """
+        Predicts on a batch of feature vectors.
+        Args:
+            features (np.ndarray): A 2D NumPy array where each row is a sample.
+        Returns:
+            Dict[str, Any]: A dictionary where keys are target names.
+                - For regression: The value is a NumPy array of predictions.
+                - For classification: The value is another dictionary {'labels': ..., 'probabilities': ...}.
+        """
+        if features.ndim != 2:
+            raise ValueError("Input for batch prediction must be a 2D array.")
+        results: Dict[str, Any] = dict()
+        for target_name, model in self.models.items():
+            if self.task == "regression":
+                results[target_name] = model.predict(features)
+            else: # Classification
+                labels = model.predict(features)
+                probabilities = model.predict_proba(features)
+                results[target_name] = {"labels": labels, "probabilities": probabilities}
+        if self.verbose:
+            _LOGGER.info("✅ Inference process complete.")
+        return results
 def info():
     _script_info(__all__)

ml_tools/path_manager.py ADDED Viewed

@@ -0,0 +1,212 @@
+from pprint import pprint
+from typing import Optional, List, Dict, Callable, Union
+from pathlib import Path
+from .utilities import _script_info
+from .logger import _LOGGER
+__all__ = [
+    "PathManager"
+]
+class PathManager:
+    """
+    Manages and stores a project's file paths, acting as a centralized
+    "path database". It supports both development mode and applications
+    bundled with Briefcase.
+    Supports python dictionary syntax.
+    """
+    def __init__(
+        self,
+        anchor_file: str,
+        base_directories: Optional[List[str]] = None
+    ):
+        """
+        The initializer determines the project's root directory and can pre-register
+        a list of base directories relative to that root.
+        Args:
+            anchor_file (str): The absolute path to a file whose parent directory will be considered the package root and name. Typically, `__file__`.
+            base_directories (Optional[List[str]]): A list of directory names located at the same level as the anchor file to be registered immediately.
+        """
+        resolved_anchor_path = Path(anchor_file).resolve()
+        self._package_name = resolved_anchor_path.parent.name
+        self._is_bundled, self._resource_path_func = self._check_bundle_status()
+        self._paths: Dict[str, Path] = {}
+        if self._is_bundled:
+            # In a bundle, resource_path gives the absolute path to the 'app_packages' dir
+            # when given the package name.
+            package_root = self._resource_path_func(self._package_name) # type: ignore
+        else:
+            # In dev mode, the package root is the directory containing the anchor file.
+            package_root = resolved_anchor_path.parent
+        # Register the root of the package itself
+        self._paths["ROOT"] = package_root
+        # Register all the base directories
+        if base_directories:
+            for dir_name in base_directories:
+                # In dev mode, this is simple. In a bundle, we must resolve
+                # each path from the package root.
+                if self._is_bundled:
+                     self._paths[dir_name] = self._resource_path_func(self._package_name, dir_name) # type: ignore
+                else:
+                     self._paths[dir_name] = package_root / dir_name
+    # A helper function to find the briefcase-injected resource function
+    def _check_bundle_status(self) -> tuple[bool, Optional[Callable]]:
+        """Checks if the app is running in a Briefcase bundle."""
+        try:
+            # This function is injected by Briefcase into the global scope
+            from briefcase.platforms.base import resource_path # type: ignore
+            return True, resource_path
+        except (ImportError, NameError):
+            return False, None
+    def get(self, key: str) -> Path:
+        """
+        Retrieves a stored path by its key.
+        Args:
+            key (str): The key of the path to retrieve.
+        Returns:
+            Path: The resolved, absolute Path object.
+        Raises:
+            KeyError: If the key is not found in the manager.
+        """
+        try:
+            return self._paths[key]
+        except KeyError:
+            _LOGGER.error(f"❌ Path key '{key}' not found.")
+            raise
+    def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
+        """
+        Adds new paths or overwrites existing ones in the manager.
+        Args:
+            new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
+                                    the identifiers and values are the
+                                    Path objects or strings to store.
+            overwrite (bool): If False (default), raises a KeyError if any
+                            key in new_paths already exists. If True,
+                            allows overwriting existing keys.
+        """
+        if not overwrite:
+            for key in new_paths:
+                if key in self._paths:
+                    raise KeyError(
+                        f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True."
+                    )
+        # Resolve any string paths to Path objects before storing
+        resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
+        self._paths.update(resolved_new_paths)
+    def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
+        """
+        Creates directory structures for registered paths in writable locations.
+        This method identifies paths that are directories (no file suffix) and creates them on the filesystem.
+        In a bundled application, this method will NOT attempt to create directories inside the read-only app package, preventing crashes. It
+        will only operate on paths outside of the package (e.g., user data dirs).
+        Args:
+            keys (Optional[List[str]]): If provided, only the directories
+                                        corresponding to these keys will be
+                                        created. If None (default), all
+                                        registered directory paths are used.
+            verbose (bool): If True, prints a message for each action.
+        """
+        path_items = []
+        if keys:
+            for key in keys:
+                if key in self._paths:
+                    path_items.append((key, self._paths[key]))
+                elif verbose:
+                    _LOGGER.warning(f"⚠️ Key '{key}' not found in PathManager, skipping.")
+        else:
+            path_items = self._paths.items()
+        # Get the package root to check against.
+        package_root = self._paths.get("ROOT")
+        for key, path in path_items:
+            if path.suffix:  # It's a file, not a directory
+                continue
+            # --- THE CRITICAL CHECK ---
+            # Determine if the path is inside the main application package.
+            is_internal_path = package_root and path.is_relative_to(package_root)
+            if self._is_bundled and is_internal_path:
+                if verbose:
+                    _LOGGER.warning(f"⚠️ Skipping internal directory '{key}' in bundled app (read-only).")
+                continue
+            # -------------------------
+            if verbose:
+                _LOGGER.info(f"📁 Ensuring directory exists for key '{key}': {path}")
+            path.mkdir(parents=True, exist_ok=True)
+    def status(self) -> None:
+        """
+        Checks the status of all registered paths on the filesystem and prints a formatted report.
+        """
+        report = {}
+        for key, path in self.items():
+            if path.is_dir():
+                report[key] = "📁 Directory"
+            elif path.is_file():
+                report[key] = "📄 File"
+            else:
+                report[key] = "❌ Not Found"
+        print("\n--- Path Status Report ---")
+        pprint(report)
+    def __repr__(self) -> str:
+        """Provides a string representation of the stored paths."""
+        path_list = "\n".join(f"  '{k}': '{v}'" for k, v in self._paths.items())
+        return f"PathManager(\n{path_list}\n)"
+    # --- Dictionary-Style Methods ---
+    def __getitem__(self, key: str) -> Path:
+        """Allows dictionary-style getting, e.g., PM['my_key']"""
+        return self.get(key)
+    def __setitem__(self, key: str, value: Union[str, Path]):
+        """Allows dictionary-style setting, does not allow overwriting, e.g., PM['my_key'] = path"""
+        self.update({key: value}, overwrite=False)
+    def __contains__(self, key: str) -> bool:
+        """Allows checking for a key's existence, e.g., if 'my_key' in PM"""
+        return key in self._paths
+    def __len__(self) -> int:
+        """Allows getting the number of paths, e.g., len(PM)"""
+        return len(self._paths)
+    def keys(self):
+        """Returns all registered path keys."""
+        return self._paths.keys()
+    def values(self):
+        """Returns all registered Path objects."""
+        return self._paths.values()
+    def items(self):
+        """Returns all registered (key, Path) pairs."""
+        return self._paths.items()
+def info():
+    _script_info(__all__)

ml_tools/utilities.py CHANGED Viewed

@@ -25,7 +25,7 @@ __all__ = [
     "serialize_object",
     "deserialize_object",
     "distribute_datasets_by_target",
-    "train_dataset_orchestrator"
+    "train_dataset_orchestrator",
 ]
@@ -645,7 +645,7 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
 class LogKeys:
     """
-    Used for ML scripts only
+    Used internally for ML scripts.
     Centralized keys for logging and history.
     """

{dragon_ml_toolbox-3.7.0.dist-info → dragon_ml_toolbox-3.9.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-3.7.0.dist-info → dragon_ml_toolbox-3.9.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-3.7.0.dist-info → dragon_ml_toolbox-3.9.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-3.7.0.dist-info → dragon_ml_toolbox-3.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 3.7.0__py3-none-any.whl → 3.9.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 3.7.0py3-none-any.whl → 3.9.0py3-none-any.whl