PyPI - dragon-ml-toolbox - Versions diffs - 14.3.1__py3-none-any.whl → 14.8.0__py3-none-any.whl - Mend

dragon-ml-toolbox 14.3.1py3-none-any.whl → 14.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (17) hide show

{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/METADATA +2 -1
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/RECORD +17 -16
ml_tools/ML_configuration.py +116 -0
ml_tools/ML_datasetmaster.py +42 -0
ml_tools/ML_evaluation.py +208 -63
ml_tools/ML_evaluation_multi.py +40 -10
ml_tools/ML_trainer.py +38 -12
ml_tools/ML_utilities.py +50 -1
ml_tools/ML_vision_datasetmaster.py +198 -60
ml_tools/ML_vision_models.py +15 -1
ml_tools/ML_vision_transformers.py +151 -6
ml_tools/ensemble_evaluation.py +53 -10
ml_tools/keys.py +2 -1
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_evaluation_multi.py CHANGED Viewed

@@ -34,6 +34,8 @@ __all__ = [
     "multi_target_shap_summary_plot",
 ]
+DPI_value = 250
 def multi_target_regression_metrics(
     y_true: np.ndarray,
@@ -90,7 +92,7 @@ def multi_target_regression_metrics(
         # --- Save Residual Plot ---
         residuals = true_i - pred_i
-        fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=100)
+        fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=DPI_value)
         ax_res.scatter(pred_i, residuals, alpha=0.6, edgecolors='k', s=50)
         ax_res.axhline(0, color='red', linestyle='--')
         ax_res.set_xlabel("Predicted Values")
@@ -103,7 +105,7 @@ def multi_target_regression_metrics(
         plt.close(fig_res)
         # --- Save True vs. Predicted Plot ---
-        fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=100)
+        fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=DPI_value)
         ax_tvp.scatter(true_i, pred_i, alpha=0.6, edgecolors='k', s=50)
         ax_tvp.plot([true_i.min(), true_i.max()], [true_i.min(), true_i.max()], 'k--', lw=2)
         ax_tvp.set_xlabel('True Values')
@@ -127,7 +129,10 @@ def multi_label_classification_metrics(
     y_prob: np.ndarray,
     target_names: List[str],
     save_dir: Union[str, Path],
-    threshold: float = 0.5
+    threshold: float = 0.5,
+    ROC_PR_line: str='darkorange',
+    cmap: str = "Blues",
+    font_size: int = 16
 ):
     """
     Calculates and saves classification metrics for each label individually.
@@ -158,6 +163,10 @@ def multi_label_classification_metrics(
     # Generate binary predictions from probabilities
     y_pred = (y_prob >= threshold).astype(int)
+    # --- Save current RC params and update font size ---
+    original_rc_params = plt.rcParams.copy()
+    plt.rcParams.update({'font.size': font_size})
     _LOGGER.info("--- Multi-Label Classification Evaluation ---")
@@ -174,7 +183,7 @@ def multi_label_classification_metrics(
         f"Jaccard Score (macro): {j_score_macro:.4f}\n"
         f"--------------------------------------------------\n"
     )
-    print(overall_report)
+    # print(overall_report)
     overall_report_path = save_dir_path / "classification_report_overall.txt"
     overall_report_path.write_text(overall_report)
@@ -192,8 +201,26 @@ def multi_label_classification_metrics(
         report_path.write_text(report_text) # type: ignore
         # --- Save Confusion Matrix ---
-        fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=100)
-        ConfusionMatrixDisplay.from_predictions(true_i, pred_i, cmap="Blues", ax=ax_cm)
+        fig_cm, ax_cm = plt.subplots(figsize=(6, 6), dpi=DPI_value)
+        disp_ = ConfusionMatrixDisplay.from_predictions(true_i,
+                                                pred_i,
+                                                cmap=cmap,
+                                                ax=ax_cm,
+                                                normalize='true',
+                                                labels=[0, 1],
+                                                display_labels=["Negative", "Positive"])
+        disp_.im_.set_clim(vmin=0.0, vmax=1.0)
+        # Turn off gridlines
+        ax_cm.grid(False)
+        # Manually update font size of cell texts
+        for text in ax_cm.texts:
+            text.set_fontsize(font_size)
+        fig_cm.tight_layout()
         ax_cm.set_title(f"Confusion Matrix for '{name}'")
         cm_path = save_dir_path / f"confusion_matrix_{sanitized_name}.svg"
         plt.savefig(cm_path)
@@ -202,8 +229,8 @@ def multi_label_classification_metrics(
         # --- Save ROC Curve ---
         fpr, tpr, _ = roc_curve(true_i, prob_i)
         auc = roc_auc_score(true_i, prob_i)
-        fig_roc, ax_roc = plt.subplots(figsize=(6, 6), dpi=100)
-        ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}')
+        fig_roc, ax_roc = plt.subplots(figsize=(6, 6), dpi=DPI_value)
+        ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=ROC_PR_line)
         ax_roc.plot([0, 1], [0, 1], 'k--')
         ax_roc.set_title(f'ROC Curve for "{name}"')
         ax_roc.set_xlabel('False Positive Rate'); ax_roc.set_ylabel('True Positive Rate')
@@ -215,14 +242,17 @@ def multi_label_classification_metrics(
         # --- Save Precision-Recall Curve ---
         precision, recall, _ = precision_recall_curve(true_i, prob_i)
         ap_score = average_precision_score(true_i, prob_i)
-        fig_pr, ax_pr = plt.subplots(figsize=(6, 6), dpi=100)
-        ax_pr.plot(recall, precision, label=f'AP = {ap_score:.2f}')
+        fig_pr, ax_pr = plt.subplots(figsize=(6, 6), dpi=DPI_value)
+        ax_pr.plot(recall, precision, label=f'AP = {ap_score:.2f}', color=ROC_PR_line)
         ax_pr.set_title(f'Precision-Recall Curve for "{name}"')
         ax_pr.set_xlabel('Recall'); ax_pr.set_ylabel('Precision')
         ax_pr.legend(loc='lower left'); ax_pr.grid(True, linestyle='--', alpha=0.6)
         pr_path = save_dir_path / f"pr_curve_{sanitized_name}.svg"
         plt.savefig(pr_path)
         plt.close(fig_pr)
+    # restore RC params
+    plt.rcParams.update(original_rc_params)
     _LOGGER.info(f"All individual label reports and plots saved to '{save_dir_path.name}'")

ml_tools/ML_trainer.py CHANGED Viewed

@@ -13,11 +13,12 @@ from .keys import PyTorchLogKeys, PyTorchCheckpointKeys, DatasetKeys
 from ._logger import _LOGGER
 from .path_manager import make_fullpath
 from .ML_vision_evaluation import segmentation_metrics, object_detection_metrics
+from .ML_configuration import ClassificationMetricsFormat, MultiClassificationMetricsFormat
 __all__ = [
     "MLTrainer",
-    "ObjectDetectionTrainer"
+    "ObjectDetectionTrainer",
 ]
@@ -334,14 +335,16 @@ class MLTrainer:
                 yield y_pred_batch, y_prob_batch, y_true_batch
-    def evaluate(self, save_dir: Union[str, Path], data: Optional[Union[DataLoader, Dataset]] = None, classification_threshold: float = 0.5):
+    def evaluate(self,
+                 save_dir: Union[str, Path],
+                 data: Optional[Union[DataLoader, Dataset]] = None,
+                 format_configuration: Optional[Union[ClassificationMetricsFormat, MultiClassificationMetricsFormat]]=None):
         """
         Evaluates the model, routing to the correct evaluation function based on task `kind`.
         Args:
             save_dir (str | Path): Directory to save all reports and plots.
             data (DataLoader | Dataset | None): The data to evaluate on. If None, defaults to the trainer's internal test_dataset.
-            classification_threshold (float): Probability threshold for multi-label tasks.
         """
         dataset_for_names = None
         eval_loader = None
@@ -376,10 +379,10 @@ class MLTrainer:
             _LOGGER.error("Cannot evaluate. No valid data was provided or found.")
             raise ValueError()
-        print("\n--- Model Evaluation ---")
+        # print("\n--- Model Evaluation ---")
         all_preds, all_probs, all_true = [], [], []
-        for y_pred_b, y_prob_b, y_true_b in self._predict_for_eval(eval_loader, classification_threshold):
+        for y_pred_b, y_prob_b, y_true_b in self._predict_for_eval(eval_loader):
             if y_pred_b is not None: all_preds.append(y_pred_b)
             if y_prob_b is not None: all_probs.append(y_prob_b)
             if y_true_b is not None: all_true.append(y_true_b)
@@ -397,7 +400,19 @@ class MLTrainer:
             regression_metrics(y_true.flatten(), y_pred.flatten(), save_dir)
         elif self.kind == "classification":
-            classification_metrics(save_dir, y_true, y_pred, y_prob)
+            # Parse configuration
+            if format_configuration and isinstance(format_configuration, ClassificationMetricsFormat):
+                classification_metrics(save_dir=save_dir,
+                                       y_true=y_true,
+                                       y_pred=y_pred,
+                                       y_prob=y_prob,
+                                       cmap=format_configuration.cmap,
+                                       class_map=format_configuration.class_map,
+                                       ROC_PR_line=format_configuration.ROC_PR_line,
+                                       calibration_bins=format_configuration.calibration_bins,
+                                       font_size=format_configuration.font_size)
+            else:
+                classification_metrics(save_dir, y_true, y_pred, y_prob)
         elif self.kind == "multi_target_regression":
             try:
@@ -419,7 +434,18 @@ class MLTrainer:
             if y_prob is None:
                 _LOGGER.error("Evaluation for multi_label_classification requires probabilities (y_prob).")
                 return
-            multi_label_classification_metrics(y_true, y_prob, target_names, save_dir, classification_threshold)
+            if format_configuration and isinstance(format_configuration, MultiClassificationMetricsFormat):
+                multi_label_classification_metrics(y_true=y_true,
+                                                   y_prob=y_prob,
+                                                   target_names=target_names,
+                                                   save_dir=save_dir,
+                                                   threshold=format_configuration.threshold,
+                                                   ROC_PR_line=format_configuration.ROC_PR_line,
+                                                   cmap=format_configuration.cmap,
+                                                   font_size=format_configuration.font_size)
+            else:
+                multi_label_classification_metrics(y_true, y_prob, target_names, save_dir)
         elif self.kind == "segmentation":
             class_names = None
@@ -445,7 +471,7 @@ class MLTrainer:
             segmentation_metrics(y_true, y_pred, save_dir, class_names=class_names)
-        print("\n--- Training History ---")
+        # print("\n--- Training History ---")
         plot_losses(self.history, save_dir=save_dir)
     def explain(self,
@@ -502,7 +528,7 @@ class MLTrainer:
             rand_indices = torch.randperm(full_data.size(0))[:num_samples]
             return full_data[rand_indices]
-        print(f"\n--- Preparing SHAP Data (sampling up to {n_samples} instances) ---")
+        # print(f"\n--- Preparing SHAP Data (sampling up to {n_samples} instances) ---")
         # 1. Get background data from the trainer's train_dataset
         background_data = _get_random_sample(self.train_dataset, n_samples)
@@ -610,7 +636,7 @@ class MLTrainer:
             plot_n_features (int): Number of top features to plot.
         """
-        print("\n--- Attention Analysis ---")
+        # print("\n--- Attention Analysis ---")
         # --- Step 1: Check if the model supports this explanation ---
         if not getattr(self.model, 'has_interpretable_attention', False):
@@ -994,7 +1020,7 @@ class ObjectDetectionTrainer:
             _LOGGER.error("Cannot evaluate. No valid data was provided or found.")
             raise ValueError()
-        print("\n--- Model Evaluation ---")
+        # print("\n--- Model Evaluation ---")
         all_predictions = []
         all_targets = []
@@ -1043,7 +1069,7 @@ class ObjectDetectionTrainer:
             print_output=False
         )
-        print("\n--- Training History ---")
+        # print("\n--- Training History ---")
         plot_losses(self.history, save_dir=save_dir)
     def _callbacks_hook(self, method_name: str, *args, **kwargs):

ml_tools/ML_utilities.py CHANGED Viewed

@@ -10,6 +10,7 @@ from ._logger import _LOGGER
 from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
 from .utilities import load_dataframe
 from .custom_logger import save_list_strings, custom_logger
+from .serde import serialize_object_filename
 __all__ = [
@@ -18,7 +19,8 @@ __all__ = [
     "get_model_parameters",
     "inspect_model_architecture",
     "inspect_pth_file",
-    "set_parameter_requires_grad"
+    "set_parameter_requires_grad",
+    "save_pretrained_transforms"
 ]
@@ -524,5 +526,52 @@ def _set_params_grad(
     return params_changed
+def save_pretrained_transforms(model: nn.Module, output_dir: Union[str, Path]):
+    """
+    Checks a model for the 'self._pretrained_default_transforms' attribute, if found,
+    serializes the returned transform object as a .joblib file.
+    This saves the callable transform object itself for
+    later use, such as passing it directly to the 'transform_source'
+    argument of the PyTorchVisionInferenceHandler.
+    Args:
+        model (nn.Module): The model instance to check.
+        output_dir (str | Path): The directory where the transform file will be saved.
+    """
+    output_filename = "pretrained_model_transformations"
+    # 1. Check for the "secret attribute"
+    if not hasattr(model, '_pretrained_default_transforms'):
+        _LOGGER.warning(f"Model of type {type(model).__name__} does not have the required attribute. No transformations saved.")
+        return
+    # 2. Get the transform object
+    try:
+        transform_obj = model._pretrained_default_transforms
+    except Exception as e:
+        _LOGGER.error(f"Error calling the required attribute on model: {e}")
+        return
+    # 3. Check if the object is actually there
+    if transform_obj is None:
+        _LOGGER.warning(f"Model {type(model).__name__} has the required attribute but returned None. No transforms saved.")
+        return
+    # 4. Serialize and save using serde
+    try:
+        serialize_object_filename(
+            obj=transform_obj,
+            save_dir=output_dir,
+            filename=output_filename,
+            verbose=True,
+            raise_on_error=True
+        )
+        # _LOGGER.info(f"Successfully saved pretrained transforms to '{output_dir}'.")
+    except Exception as e:
+        _LOGGER.error(f"Failed to serialize transformations: {e}")
+        raise
 def info():
     _script_info(__all__)

dragon-ml-toolbox 14.3.1__py3-none-any.whl → 14.8.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 14.3.1py3-none-any.whl → 14.8.0py3-none-any.whl