PyPI - dragon-ml-toolbox - Versions diffs - 14.3.1__py3-none-any.whl → 16.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 14.3.1py3-none-any.whl → 16.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (44) hide show

{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/METADATA +10 -5
dragon_ml_toolbox-16.0.0.dist-info/RECORD +51 -0
ml_tools/ETL_cleaning.py +20 -20
ml_tools/ETL_engineering.py +23 -25
ml_tools/GUI_tools.py +20 -20
ml_tools/MICE_imputation.py +3 -3
ml_tools/ML_callbacks.py +43 -26
ml_tools/ML_configuration.py +309 -0
ml_tools/ML_datasetmaster.py +220 -260
ml_tools/ML_evaluation.py +317 -81
ml_tools/ML_evaluation_multi.py +127 -36
ml_tools/ML_inference.py +249 -207
ml_tools/ML_models.py +13 -102
ml_tools/ML_models_advanced.py +1 -1
ml_tools/ML_optimization.py +12 -12
ml_tools/ML_scaler.py +11 -11
ml_tools/ML_sequence_datasetmaster.py +341 -0
ml_tools/ML_sequence_evaluation.py +215 -0
ml_tools/ML_sequence_inference.py +391 -0
ml_tools/ML_sequence_models.py +139 -0
ml_tools/ML_trainer.py +1247 -338
ml_tools/ML_utilities.py +51 -2
ml_tools/ML_vision_datasetmaster.py +262 -118
ml_tools/ML_vision_evaluation.py +26 -6
ml_tools/ML_vision_inference.py +117 -140
ml_tools/ML_vision_models.py +15 -1
ml_tools/ML_vision_transformers.py +233 -7
ml_tools/PSO_optimization.py +6 -6
ml_tools/SQL.py +4 -4
ml_tools/{keys.py → _keys.py} +45 -1
ml_tools/_schema.py +1 -1
ml_tools/ensemble_evaluation.py +54 -11
ml_tools/ensemble_inference.py +7 -33
ml_tools/ensemble_learning.py +1 -1
ml_tools/optimization_tools.py +2 -2
ml_tools/path_manager.py +5 -5
ml_tools/utilities.py +1 -2
dragon_ml_toolbox-14.3.1.dist-info/RECORD +0 -48
ml_tools/RNN_forecast.py +0 -56
ml_tools/_ML_vision_recipe.py +0 -88
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_vision_transformers.py CHANGED Viewed

@@ -1,14 +1,19 @@
-from typing import Union, Dict, Type, Callable
+from typing import Union, Dict, Type, Callable, Optional, Any, List, Literal
 from PIL import ImageOps, Image
+from torchvision import transforms
+from pathlib import Path
+import json
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .keys import VisionTransformRecipeKeys
+from ._keys import VisionTransformRecipeKeys
+from .path_manager import make_fullpath
 __all__ = [
     "TRANSFORM_REGISTRY",
-    "ResizeAspectFill"
+    "ResizeAspectFill",
+    "create_offline_augmentations"
 ]
 # --- Custom Vision Transform Class ---
@@ -23,9 +28,8 @@ class ResizeAspectFill:
     """
     def __init__(self, pad_color: Union[str, int] = "black") -> None:
         self.pad_color = pad_color
-        # Store kwargs to allow for recreation
+        # Store kwargs to allow for re-creation
         self.__setattr__(VisionTransformRecipeKeys.KWARGS, {"pad_color": pad_color})
-        # self._kwargs = {"pad_color": pad_color}
     def __call__(self, image: Image.Image) -> Image.Image:
         if not isinstance(image, Image.Image):
@@ -47,12 +51,234 @@ class ResizeAspectFill:
             padding = (left_padding, 0, right_padding, 0)
         return ImageOps.expand(image, padding, fill=self.pad_color)
-#NOTE: Add custom transforms here.
+#############################################################
+#NOTE: Add custom transforms.
 TRANSFORM_REGISTRY: Dict[str, Type[Callable]] = {
     "ResizeAspectFill": ResizeAspectFill,
 }
+#############################################################
+def create_offline_augmentations(
+    input_directory: Union[str, Path],
+    output_directory: Union[str, Path],
+    results_per_image: int,
+    recipe: Optional[Dict[str, Any]] = None,
+    save_format: Literal["WEBP", "JPEG", "PNG", "BMP", "TIF"] = "WEBP",
+    save_quality: int = 80
+) -> None:
+    """
+    Reads all valid images from an input directory, applies augmentations,
+    and saves the new images to an output directory (offline augmentation).
+    Skips subdirectories in the input path.
+    Args:
+        input_directory (Union[str, Path]): Path to the directory of source images.
+        output_directory (Union[str, Path]): Path to save the augmented images.
+        results_per_image (int): The number of augmented versions to create
+                                 for each source image.
+        recipe (Optional[Dict[str, Any]]): A transform recipe dictionary. If None,
+                                           a default set of strong, random
+                                           augmentations will be used.
+        save_format (str): The format to save images (e.g., "WEBP", "JPEG", "PNG").
+                           Defaults to "WEBP" for good compression.
+        save_quality (int): The quality for lossy formats (1-100). Defaults to 80.
+    """
+    VALID_IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tif', '.tiff')
+    # --- 1. Validate Paths ---
+    in_path = make_fullpath(input_directory, enforce="directory")
+    out_path = make_fullpath(output_directory, make=True, enforce="directory")
+    _LOGGER.info(f"Starting offline augmentation:\n\tInput: {in_path}\n\tOutput: {out_path}")
+    # --- 2. Find Images ---
+    image_files = [
+        f for f in in_path.iterdir()
+        if f.is_file() and f.suffix.lower() in VALID_IMG_EXTENSIONS
+    ]
+    if not image_files:
+        _LOGGER.warning(f"No valid image files found in {in_path}.")
+        return
+    _LOGGER.info(f"Found {len(image_files)} images to process.")
+    # --- 3. Define Transform Pipeline ---
+    transform_pipeline: transforms.Compose
+    if recipe:
+        _LOGGER.info("Building transformations from provided recipe.")
+        try:
+            transform_pipeline = _build_transform_from_recipe(recipe)
+        except Exception as e:
+            _LOGGER.error(f"Failed to build transform from recipe: {e}")
+            return
+    else:
+        _LOGGER.info("No recipe provided. Using default random augmentation pipeline.")
+        # Default "random" pipeline
+        transform_pipeline = transforms.Compose([
+            transforms.RandomResizedCrop(256, scale=(0.4, 1.0)),
+            transforms.RandomHorizontalFlip(p=0.5),
+            transforms.RandomRotation(degrees=90),
+            transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.15),
+            transforms.RandomPerspective(distortion_scale=0.2, p=0.4),
+            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
+            transforms.RandomApply([
+                transforms.GaussianBlur(kernel_size=3)
+            ], p=0.3)
+        ])
+    # --- 4. Process Images ---
+    total_saved = 0
+    format_upper = save_format.upper()
+    for img_path in image_files:
+        _LOGGER.debug(f"Processing {img_path.name}...")
+        try:
+            original_image = Image.open(img_path).convert("RGB")
+            for i in range(results_per_image):
+                new_stem = f"{img_path.stem}_aug_{i+1:03d}"
+                output_path = out_path / f"{new_stem}.{format_upper.lower()}"
+                # Apply transform
+                transformed_image = transform_pipeline(original_image)
+                # Save
+                transformed_image.save(
+                    output_path,
+                    format=format_upper,
+                    quality=save_quality,
+                    optimize=True # Add optimize flag
+                )
+                total_saved += 1
+        except Exception as e:
+            _LOGGER.warning(f"Failed to process or save augmentations for {img_path.name}: {e}")
+    _LOGGER.info(f"Offline augmentation complete. Saved {total_saved} new images.")
+def _build_transform_from_recipe(recipe: Dict[str, Any]) -> transforms.Compose:
+    """Internal helper to build a transform pipeline from a recipe dict."""
+    pipeline_steps: List[Callable] = []
+    if VisionTransformRecipeKeys.PIPELINE not in recipe:
+        _LOGGER.error("Recipe dict is invalid: missing 'pipeline' key.")
+        raise ValueError("Invalid recipe format.")
+    for step in recipe[VisionTransformRecipeKeys.PIPELINE]:
+        t_name = step.get(VisionTransformRecipeKeys.NAME)
+        t_kwargs = step.get(VisionTransformRecipeKeys.KWARGS, {})
+        if not t_name:
+            _LOGGER.error(f"Invalid transform step, missing 'name': {step}")
+            continue
+        transform_class: Any = None
+        # 1. Check standard torchvision transforms
+        if hasattr(transforms, t_name):
+            transform_class = getattr(transforms, t_name)
+        # 2. Check custom transforms
+        elif t_name in TRANSFORM_REGISTRY:
+            transform_class = TRANSFORM_REGISTRY[t_name]
+        # 3. Not found
+        else:
+            _LOGGER.error(f"Unknown transform '{t_name}' in recipe. Not found in torchvision.transforms or TRANSFORM_REGISTRY.")
+            raise ValueError(f"Unknown transform name: {t_name}")
+        # Instantiate the transform
+        try:
+            pipeline_steps.append(transform_class(**t_kwargs))
+        except Exception as e:
+            _LOGGER.error(f"Failed to instantiate transform '{t_name}' with kwargs {t_kwargs}: {e}")
+            raise
+    return transforms.Compose(pipeline_steps)
+def _save_recipe(recipe: Dict[str, Any], filepath: Path) -> None:
+    """
+    Saves a transform recipe dictionary to a JSON file.
+    Args:
+        recipe (Dict[str, Any]): The recipe dictionary to save.
+        filepath (str): The path to the output .json file.
+    """
+    final_filepath = filepath.with_suffix(".json")
+    try:
+        with open(final_filepath, 'w') as f:
+            json.dump(recipe, f, indent=4)
+        _LOGGER.info(f"Transform recipe saved as '{final_filepath.name}'.")
+    except Exception as e:
+        _LOGGER.error(f"Failed to save recipe to '{final_filepath}': {e}")
+        raise
+def _load_recipe_and_build_transform(filepath: Union[str,Path]) -> transforms.Compose:
+    """
+    Loads a transform recipe from a .json file and reconstructs the
+    torchvision.transforms.Compose pipeline.
+    Args:
+        filepath (str): Path to the saved transform recipe .json file.
+    Returns:
+        transforms.Compose: The reconstructed transformation pipeline.
+    Raises:
+        ValueError: If a transform name in the recipe is not found in
+                    torchvision.transforms or the custom TRANSFORM_REGISTRY.
+    """
+    # validate filepath
+    final_filepath = make_fullpath(filepath, enforce="file")
+    try:
+        with open(final_filepath, 'r') as f:
+            recipe = json.load(f)
+    except Exception as e:
+        _LOGGER.error(f"Failed to load recipe from '{final_filepath}': {e}")
+        raise
+    pipeline_steps: List[Callable] = []
+    if VisionTransformRecipeKeys.PIPELINE not in recipe:
+        _LOGGER.error("Recipe file is invalid: missing 'pipeline' key.")
+        raise ValueError("Invalid recipe format.")
+    for step in recipe[VisionTransformRecipeKeys.PIPELINE]:
+        t_name = step[VisionTransformRecipeKeys.NAME]
+        t_kwargs = step[VisionTransformRecipeKeys.KWARGS]
+        transform_class: Any = None
+        # 1. Check standard torchvision transforms
+        if hasattr(transforms, t_name):
+            transform_class = getattr(transforms, t_name)
+        # 2. Check custom transforms
+        elif t_name in TRANSFORM_REGISTRY:
+            transform_class = TRANSFORM_REGISTRY[t_name]
+        # 3. Not found
+        else:
+            _LOGGER.error(f"Unknown transform '{t_name}' in recipe. Not found in torchvision.transforms or TRANSFORM_REGISTRY.")
+            raise ValueError(f"Unknown transform name: {t_name}")
+        # Instantiate the transform
+        try:
+            pipeline_steps.append(transform_class(**t_kwargs))
+        except Exception as e:
+            _LOGGER.error(f"Failed to instantiate transform '{t_name}' with kwargs {t_kwargs}: {e}")
+            raise
+    _LOGGER.info(f"Successfully loaded and built transform pipeline from '{final_filepath.name}'.")
+    return transforms.Compose(pipeline_steps)
 def info():
     _script_info(__all__)

ml_tools/PSO_optimization.py CHANGED Viewed

@@ -12,9 +12,9 @@ from .serde import deserialize_object
 from .math_utilities import threshold_binary_values, threshold_binary_values_batch
 from .path_manager import sanitize_filename, make_fullpath, list_files_by_extension
 from ._logger import _LOGGER
-from .keys import EnsembleKeys
+from ._keys import EnsembleKeys
 from ._script_info import _script_info
-from .SQL import DatabaseManager
+from .SQL import DragonSQL
 from .optimization_tools import _save_result
 """
@@ -191,7 +191,7 @@ def _set_feature_names(size: int, names: Union[list[str], None]):
         return names
-def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
+def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DragonSQL], db_table_name: str):
     """Helper for a single PSO run that also handles saving."""
     pso_args.update({"seed": random_state})
@@ -213,7 +213,7 @@ def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, featu
     return best_features_named, best_target_named
-def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
+def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DragonSQL], db_table_name: str):
     """Helper for post-hoc analysis that saves results incrementally."""
     progress = trange(repetitions, desc="Post-Hoc PSO", unit="run")
     for _ in progress:
@@ -342,7 +342,7 @@ def run_pso(lower_boundaries: list[float],
         schema = {"result_id": "INTEGER PRIMARY KEY AUTOINCREMENT", **schema}
         # Create table
-        with DatabaseManager(db_path) as db:
+        with DragonSQL(db_path) as db:
             db.create_table(db_table_name, schema)
     pso_arguments = {
@@ -357,7 +357,7 @@ def run_pso(lower_boundaries: list[float],
     # --- Dispatcher ---
     # Use a real or dummy context manager to handle the DB connection cleanly
-    db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
+    db_context = DragonSQL(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
     with db_context as db_manager:
         if post_hoc_analysis is None or post_hoc_analysis <= 1:

ml_tools/SQL.py CHANGED Viewed

@@ -9,11 +9,11 @@ from .path_manager import make_fullpath, sanitize_filename
 __all__ = [
-    "DatabaseManager",
+    "DragonSQL",
 ]
-class DatabaseManager:
+class DragonSQL:
     """
     A user-friendly context manager for handling SQLite database operations.
@@ -35,7 +35,7 @@ class DatabaseManager:
     ...     "feature_a": "REAL",
     ...     "score": "REAL"
     ... }
-    >>> with DatabaseManager("my_results.db") as db:
+    >>> with DragonSQL("my_results.db") as db:
     ...     db.create_table("experiments", schema)
     ...     data = {"run_name": "first_run", "feature_a": 0.123, "score": 95.5}
     ...     db.insert_row("experiments", data)
@@ -43,7 +43,7 @@ class DatabaseManager:
     ...     print(df)
     """
     def __init__(self, db_path: Union[str, Path]):
-        """Initializes the DatabaseManager with the path to the database file."""
+        """Initializes the DragonSQL with the path to the database file."""
         if isinstance(db_path, str):
             if not db_path.endswith(".db"):
                 db_path = db_path + ".db"

ml_tools/{keys.py → _keys.py} RENAMED Viewed

@@ -1,3 +1,10 @@
+class MagicWords:
+    """General purpose keys"""
+    LATEST = "latest"
+    CURRENT = "current"
+    RENAME = "rename"
 class PyTorchLogKeys:
     """
     Used internally for ML scripts module.
@@ -7,6 +14,7 @@ class PyTorchLogKeys:
     # --- Epoch Level ---
     TRAIN_LOSS = 'train_loss'
     VAL_LOSS = 'val_loss'
+    LEARNING_RATE = 'lr'
     # --- Batch Level ---
     BATCH_LOSS = 'loss'
@@ -79,6 +87,13 @@ class PyTorchCheckpointKeys:
     SCHEDULER_STATE = "scheduler_state_dict"
     EPOCH = "epoch"
     BEST_SCORE = "best_score"
+    HISTORY = "history"
+    CHECKPOINT_NAME = "PyModelCheckpoint"
+    # Finalized config
+    CLASSIFICATION_THRESHOLD = "classification_threshold"
+    CLASS_MAP = "class_map"
+    SEQUENCE_LENGTH = "sequence_length"
+    INITIAL_SEQUENCE = "initial_sequence"
 class UtilityKeys:
@@ -104,8 +119,9 @@ class VisionTransformRecipeKeys:
     TASK = "task"
     PIPELINE = "pipeline"
     NAME = "name"
-    KWARGS = "_kwargs"
+    KWARGS = "kwargs"
     PRE_TRANSFORMS = "pre_transforms"
     RESIZE_SIZE = "resize_size"
     CROP_SIZE = "crop_size"
     MEAN = "mean"
@@ -118,6 +134,34 @@ class ObjectDetectionKeys:
     LABELS = "labels"
+class MLTaskKeys:
+    """Used by the Trainer and InferenceHandlers"""
+    REGRESSION = "regression"
+    MULTITARGET_REGRESSION = "multitarget regression"
+    BINARY_CLASSIFICATION = "binary classification"
+    MULTICLASS_CLASSIFICATION = "multiclass classification"
+    MULTILABEL_BINARY_CLASSIFICATION = "multilabel binary classification"
+    BINARY_IMAGE_CLASSIFICATION = "binary image classification"
+    MULTICLASS_IMAGE_CLASSIFICATION = "multiclass image classification"
+    BINARY_SEGMENTATION = "binary segmentation"
+    MULTICLASS_SEGMENTATION = "multiclass segmentation"
+    OBJECT_DETECTION = "object detection"
+    SEQUENCE_SEQUENCE = "sequence-to-sequence"
+    SEQUENCE_VALUE = "sequence-to-value"
+    ALL_BINARY_TASKS = [BINARY_CLASSIFICATION, MULTILABEL_BINARY_CLASSIFICATION, BINARY_IMAGE_CLASSIFICATION, BINARY_SEGMENTATION]
+class DragonTrainerKeys:
+    VALIDATION_METRICS_DIR = "Validation_Metrics"
+    TEST_METRICS_DIR = "Test_Metrics"
 class _OneHotOtherPlaceholder:
     """Used internally by GUI_tools."""
     OTHER_GUI = "OTHER"

ml_tools/_schema.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import NamedTuple, Tuple, Optional, Dict, Union
 from pathlib import Path
 from .custom_logger import save_list_strings
-from .keys import DatasetKeys
+from ._keys import DatasetKeys
 from ._logger import _LOGGER

ml_tools/ensemble_evaluation.py CHANGED Viewed

@@ -25,7 +25,7 @@ from typing import Union, Optional, Literal
 from .path_manager import sanitize_filename, make_fullpath
 from ._script_info import _script_info
 from ._logger import _LOGGER
-from .keys import SHAPKeys
+from ._keys import SHAPKeys
 __all__ = [
@@ -112,7 +112,7 @@ def evaluate_model_classification(
         report_df = pd.DataFrame(report_dict).iloc[:-1, :].T
         plt.figure(figsize=figsize)
         sns.heatmap(report_df, annot=True, cmap=heatmap_cmap, fmt='.2f',
-                    annot_kws={"size": base_fontsize - 4})
+                    annot_kws={"size": base_fontsize - 4}, vmin=0.0, vmax=1.0)
         plt.title(f"{model_name} - {target_name}", fontsize=base_fontsize)
         plt.xticks(fontsize=base_fontsize - 2)
         plt.yticks(fontsize=base_fontsize - 2)
@@ -133,6 +133,7 @@ def evaluate_model_classification(
         normalize="true",
         ax=ax
     )
+    disp.im_.set_clim(vmin=0.0, vmax=1.0)
     ax.set_title(f"{model_name} - {target_name}", fontsize=base_fontsize)
     ax.tick_params(axis='both', labelsize=base_fontsize)
@@ -327,7 +328,8 @@ def plot_calibration_curve(
     target_name: str,
     figure_size: tuple = (10, 10),
     base_fontsize: int = 24,
-    n_bins: int = 15
+    n_bins: int = 15,
+    line_color: str = 'darkorange'
 ) -> plt.Figure: # type: ignore
     """
     Plots the calibration curve (reliability diagram) for a classifier.
@@ -348,22 +350,63 @@ def plot_calibration_curve(
     """
     fig, ax = plt.subplots(figsize=figure_size)
-    disp = CalibrationDisplay.from_estimator(
-        model,
-        x_test,
-        y_test,
-        n_bins=n_bins,
-        ax=ax
+    # --- Step 1: Get probabilities from the estimator ---
+    # We do this manually so we can pass them to from_predictions
+    try:
+        y_prob = model.predict_proba(x_test)
+        # Use probabilities for the positive class (assuming binary)
+        y_score = y_prob[:, 1]
+    except Exception as e:
+        _LOGGER.error(f"Could not get probabilities from model: {e}")
+        plt.close(fig)
+        return fig # Return empty figure
+    # --- Step 2: Get binned data *without* plotting ---
+    with plt.ioff():
+        fig_temp, ax_temp = plt.subplots()
+        cal_display_temp = CalibrationDisplay.from_predictions(
+            y_test,
+            y_score,
+            n_bins=n_bins,
+            ax=ax_temp,
+            name="temp"
+        )
+        line_x, line_y = cal_display_temp.line_.get_data() # type: ignore
+        plt.close(fig_temp)
+    # --- Step 3: Build the plot from scratch on ax ---
+    # 3a. Plot the ideal diagonal line
+    ax.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
+    # 3b. Use regplot for the regression line and its CI
+    sns.regplot(
+        x=line_x,
+        y=line_y,
+        ax=ax,
+        scatter=False,  # No scatter dots
+        label=f"Calibration Curve ({n_bins} bins)",
+        line_kws={
+            'color': line_color,
+            'linestyle': '--',
+            'linewidth': 2
+        }
     )
+    # --- Step 4: Apply original formatting ---
     ax.set_title(f"{model_name} - Reliability Curve for {target_name}", fontsize=base_fontsize)
     ax.tick_params(axis='both', labelsize=base_fontsize - 2)
     ax.set_xlabel("Mean Predicted Probability", fontsize=base_fontsize)
     ax.set_ylabel("Fraction of Positives", fontsize=base_fontsize)
-    ax.legend(fontsize=base_fontsize - 4)
+    # Set limits
+    ax.set_ylim(0.0, 1.0)
+    ax.set_xlim(0.0, 1.0)
+    ax.legend(fontsize=base_fontsize - 4, loc='lower right')
     fig.tight_layout()
-    # Save figure
+    # --- Step 5: Save figure (using original logic) ---
     save_path = make_fullpath(save_dir, make=True)
     sanitized_target_name = sanitize_filename(target_name)
     full_save_path = save_path / f"Calibration_Plot_{sanitized_target_name}.svg"

ml_tools/ensemble_inference.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Union, Literal, Dict, Any, Optional, List
 from pathlib import Path
 import json
-import joblib
 import numpy as np
 # Inference models
 import xgboost
@@ -10,16 +9,17 @@ import lightgbm
 from ._script_info import _script_info
 from ._logger import _LOGGER
 from .path_manager import make_fullpath, list_files_by_extension
-from .keys import EnsembleKeys
+from ._keys import EnsembleKeys
+from .serde import deserialize_object
 __all__ = [
-    "InferenceHandler",
+    "DragonEnsembleInferenceHandler",
     "model_report"
 ]
-class InferenceHandler:
+class DragonEnsembleInferenceHandler:
     """
     Handles loading ensemble models and performing inference for either regression or classification tasks.
     """
@@ -44,9 +44,9 @@ class InferenceHandler:
         for fname, fpath in model_files.items():
             try:
                 full_object: dict
-                full_object = _deserialize_object(filepath=fpath,
+                full_object = deserialize_object(filepath=fpath,
                                                  verbose=self.verbose,
-                                                 raise_on_error=True) # type: ignore
+                                                 expected_type=dict)
                 model: Any = full_object[EnsembleKeys.MODEL]
                 target_name: str = full_object[EnsembleKeys.TARGET]
@@ -170,7 +170,7 @@ def model_report(
     # --- 2. Deserialize and Extract Info ---
     try:
-        full_object: dict = _deserialize_object(model_p) # type: ignore
+        full_object: dict = deserialize_object(model_p, expected_type=dict, verbose=verbose) # type: ignore
         model = full_object[EnsembleKeys.MODEL]
         target = full_object[EnsembleKeys.TARGET]
         features = full_object[EnsembleKeys.FEATURES]
@@ -218,31 +218,5 @@ def model_report(
     return report_data
-# Local implementation to avoid calling utilities dependencies
-def _deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_error: bool=True) -> Optional[Any]:
-    """
-    Loads a serialized object from a .joblib file.
-    Parameters:
-        filepath (str | Path): Full path to the serialized .joblib file.
-    Returns:
-        (Any | None): The deserialized Python object, or None if loading fails.
-    """
-    true_filepath = make_fullpath(filepath)
-    try:
-        obj = joblib.load(true_filepath)
-    except (IOError, OSError, EOFError, TypeError, ValueError) as e:
-        _LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
-        if raise_on_error:
-            raise e
-        return None
-    else:
-        if verbose:
-            _LOGGER.info(f"Loaded object of type '{type(obj)}'")
-        return obj
 def info():
     _script_info(__all__)

ml_tools/ensemble_learning.py CHANGED Viewed

@@ -17,7 +17,7 @@ from .utilities import yield_dataframes_from_dir, train_dataset_yielder
 from .serde import serialize_object_filename
 from .path_manager import sanitize_filename, make_fullpath
 from ._script_info import _script_info
-from .keys import EnsembleKeys
+from ._keys import EnsembleKeys
 from ._logger import _LOGGER
 from .ensemble_evaluation import (evaluate_model_classification,
                                   plot_roc_curve,

ml_tools/optimization_tools.py CHANGED Viewed

@@ -8,7 +8,7 @@ from .path_manager import make_fullpath, list_csv_paths, sanitize_filename
 from .utilities import yield_dataframes_from_dir
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .SQL import DatabaseManager
+from .SQL import DragonSQL
 from ._schema import FeatureSchema
@@ -262,7 +262,7 @@ def _save_result(
         result_dict: dict,
         save_format: Literal['csv', 'sqlite', 'both'],
         csv_path: Path,
-        db_manager: Optional[DatabaseManager] = None,
+        db_manager: Optional[DragonSQL] = None,
         db_table_name: Optional[str] = None,
         categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None
     ):

dragon-ml-toolbox 14.3.1__py3-none-any.whl → 16.0.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 14.3.1py3-none-any.whl → 16.0.0py3-none-any.whl