PyPI - dragon-ml-toolbox - Versions diffs - 2.3.0__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 2.3.0py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{dragon_ml_toolbox-2.3.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/METADATA +26 -9
dragon_ml_toolbox-3.0.0.dist-info/RECORD +25 -0
ml_tools/ETL_engineering.py +8 -7
ml_tools/GUI_tools.py +495 -0
ml_tools/MICE_imputation.py +8 -4
ml_tools/ML_callbacks.py +341 -0
ml_tools/ML_evaluation.py +255 -0
ml_tools/ML_trainer.py +344 -0
ml_tools/ML_tutorial.py +300 -0
ml_tools/PSO_optimization.py +27 -20
ml_tools/RNN_forecast.py +49 -0
ml_tools/VIF_factor.py +6 -5
ml_tools/datasetmaster.py +601 -527
ml_tools/ensemble_learning.py +12 -9
ml_tools/handle_excel.py +9 -10
ml_tools/logger.py +45 -8
ml_tools/utilities.py +18 -1
dragon_ml_toolbox-2.3.0.dist-info/RECORD +0 -21
ml_tools/trainer.py +0 -346
ml_tools/vision_helpers.py +0 -231
{dragon_ml_toolbox-2.3.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-2.3.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-2.3.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-2.3.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/top_level.txt +0 -0
/ml_tools/{pytorch_models.py → _pytorch_models.py} +0 -0

ml_tools/ensemble_learning.py CHANGED Viewed

@@ -20,6 +20,7 @@ from sklearn.metrics import accuracy_score, classification_report, ConfusionMatr
 import shap
 from .utilities import yield_dataframes_from_dir, sanitize_filename, _script_info, serialize_object, make_fullpath
+from .logger import _LOGGER
 import warnings # Ignore warnings
 warnings.filterwarnings('ignore', category=DeprecationWarning)
@@ -438,16 +439,16 @@ def dataset_pipeline(df_features: pd.DataFrame, df_target: pd.Series, task: Task
     '''
     #DEBUG
     if debug:
-        print(f"Split Dataframes Shapes - Features DF: {df_features.shape}, Target DF: {df_target.shape}")
+        _LOGGER.info(f"Split Dataframes Shapes - Features DF: {df_features.shape}, Target DF: {df_target.shape}")
         unique_values = df_target.unique()  # Get unique values for the target column
-        print(f"\tUnique values for '{df_target.name}': {unique_values}")
+        _LOGGER.info(f"\tUnique values for '{df_target.name}': {unique_values}")
     #Train test split
     X_train, X_test, y_train, y_test = _split_data(features=df_features, target=df_target, test_size=test_size, random_state=random_state, task=task)
     #DEBUG
     if debug:
-        print(f"Shapes after train test split - X_train: {X_train.shape}, y_train: {y_train.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")
+        _LOGGER.info(f"Shapes after train test split - X_train: {X_train.shape}, y_train: {y_train.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")
     # Resample
@@ -458,7 +459,7 @@ def dataset_pipeline(df_features: pd.DataFrame, df_target: pd.Series, task: Task
     #DEBUG
     if debug:
-        print(f"Shapes after resampling - X_train: {X_train_oversampled.shape}, y_train: {y_train_oversampled.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")
+        _LOGGER.info(f"Shapes after resampling - X_train: {X_train_oversampled.shape}, y_train: {y_train_oversampled.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")
     return X_train_oversampled, y_train_oversampled, X_test, y_test
@@ -864,7 +865,7 @@ def train_test_pipeline(model, model_name: str, dataset_id: str, task: TaskType,
     print(f"\tTraining model: {model_name} for Target: {target_name}...")
     trained_model = _train_model(model=model, train_features=train_features, train_target=train_target)
     if debug:
-        print(f"Trained model object: {type(trained_model)}")
+        _LOGGER.info(f"Trained model object: {type(trained_model)}")
     local_save_directory = _local_directories(model_name=model_name, dataset_id=dataset_id, save_dir=save_dir)
     if save_model:
@@ -885,11 +886,11 @@ def train_test_pipeline(model, model_name: str, dataset_id: str, task: TaskType,
     else:
         raise ValueError(f"Unrecognized task '{task}' for model training,")
     if debug:
-        print(f"Predicted vector: {type(y_pred)} with shape: {y_pred.shape}")
+        _LOGGER.info(f"Predicted vector: {type(y_pred)} with shape: {y_pred.shape}")
     get_shap_values(model=trained_model, model_name=model_name, save_dir=local_save_directory,
                     features_to_explain=train_features, feature_names=feature_names, target_name=target_name, task=task)
-    # print("\t...done.")
     return trained_model, y_pred
 ###### 5. Execution ######
@@ -902,7 +903,7 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
     elif isinstance(model_object, ClassificationTreeModels):
         task = "classification"
         if handle_classification_imbalance is None:
-            print("⚠️ No method to handle classification class imbalance has been selected. Datasets are assumed to be balanced.")
+            _LOGGER.warning("⚠️ No method to handle classification class imbalance has been selected. Datasets are assumed to be balanced.")
         elif handle_classification_imbalance == "by_model":
             model_object.use_model_balance = True
         else:
@@ -914,6 +915,7 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
     datasets_path = make_fullpath(datasets_dir)
     save_path = make_fullpath(save_dir, make=True)
+    _LOGGER.info("Training starting...")
     #Yield imputed dataset
     for dataframe, dataframe_name in yield_dataframes_from_dir(datasets_path):
         #Yield features dataframe and target dataframe
@@ -931,7 +933,8 @@ def run_ensemble_pipeline(datasets_dir: Union[str,Path], save_dir: Union[str,Pat
                                     test_features=X_test, test_target=y_test,
                                     feature_names=feature_names,target_name=target_name,
                                     debug=debug, save_dir=save_path, save_model=save_model)
-    print("\n✅ Training and evaluation complete.")
+    print("")
+    _LOGGER.info("✅ Training and evaluation complete.")
 def info():

ml_tools/handle_excel.py CHANGED Viewed

@@ -3,6 +3,7 @@ from openpyxl import load_workbook, Workbook
 import pandas as pd
 from typing import List, Optional, Union
 from .utilities import _script_info, sanitize_filename, make_fullpath
+from .logger import _LOGGER
 __all__ = [
@@ -95,10 +96,9 @@ def unmerge_and_split_excel(filepath: Union[str,Path]) -> None:
         output_path = base_dir / output_filename
         new_wb.save(output_path)
-        # print(f"Saved: {output_path}")
         total_output_files += 1
-    print(f"✅ Processed file: {file_path} into {total_output_files} output file(s).")
+    _LOGGER.info(f"✅ Processed file: {file_path} into {total_output_files} output file(s).")
     return None
@@ -152,10 +152,9 @@ def unmerge_and_split_from_directory(input_dir: Union[str,Path], output_dir: Uni
             output_path = global_output_path / output_filename
             new_wb.save(output_path)
-            # print(f"Saved: {output_path}")
             total_output_files += 1
-    print(f"✅ Processed {len(excel_files)} input Excel file(s) with a total of {total_output_files} output Excel file(s).")
+    _LOGGER.info(f"✅ Processed {len(excel_files)} input Excel file(s) with a total of {total_output_files} output Excel file(s).")
     return None
@@ -199,13 +198,13 @@ def validate_excel_schema(
                     invalid_files.append(file)
         except Exception as e:
-            print(f"Error processing '{file}': {e}")
+            _LOGGER.error(f"Error processing '{file}': {e}")
             invalid_files.append(file)
     valid_excel_number = len(excel_paths) - len(invalid_files)
-    print(f"{valid_excel_number} out of {len(excel_paths)} excel files conform to the schema.")
+    _LOGGER.info(f"{valid_excel_number} out of {len(excel_paths)} excel files conform to the schema.")
     if invalid_files:
-        print(f"⚠️ {len(invalid_files)} excel files are invalid:")
+        _LOGGER.warning(f"⚠️ {len(invalid_files)} excel files are invalid:")
         for in_file in invalid_files:
             print(f"  - {in_file.name}")
@@ -266,7 +265,7 @@ def vertical_merge_transform_excel(
         merged_df.columns = rename_columns
     merged_df.to_csv(csv_path, index=False, encoding='utf-8')
-    print(f"✅ Merged {len(dataframes)} excel files into '{csv_filename}'.")
+    _LOGGER.info(f"✅ Merged {len(dataframes)} excel files into '{csv_filename}'.")
 def horizontal_merge_transform_excel(
@@ -344,9 +343,9 @@ def horizontal_merge_transform_excel(
     merged_df.to_csv(csv_path, index=False, encoding='utf-8')
-    print(f"✅ Merged {len(excel_files)} Excel files into '{csv_filename}'.")
+    _LOGGER.info(f"✅ Merged {len(excel_files)} Excel files into '{csv_filename}'.")
     if duplicate_columns:
-        print(f"⚠️ Duplicate columns: {duplicate_columns}")
+        _LOGGER.warning(f"⚠️ Duplicate columns: {duplicate_columns}")
 def info():

ml_tools/logger.py CHANGED Viewed

@@ -6,6 +6,9 @@ from openpyxl.styles import Font, PatternFill
 import traceback
 import json
 from .utilities import sanitize_filename, _script_info, make_fullpath
+import logging
+import sys
 __all__ = [
@@ -62,30 +65,30 @@ def custom_logger(
         base_path = save_path / f"{log_name}_{timestamp}"
         if isinstance(data, list):
-            _log_list_to_txt(data, base_path + ".txt")
+            _log_list_to_txt(data, base_path.with_suffix(".txt"))
         elif isinstance(data, dict):
             if all(isinstance(v, list) for v in data.values()):
-                _log_dict_to_csv(data, base_path + ".csv")
+                _log_dict_to_csv(data, base_path.with_suffix(".csv"))
             else:
-                _log_dict_to_json(data, base_path + ".json")
+                _log_dict_to_json(data, base_path.with_suffix(".json"))
         elif isinstance(data, pd.DataFrame):
-            _log_dataframe_to_xlsx(data, base_path + ".xlsx")
+            _log_dataframe_to_xlsx(data, base_path.with_suffix(".xlsx"))
         elif isinstance(data, str):
-            _log_string_to_log(data, base_path + ".log")
+            _log_string_to_log(data, base_path.with_suffix(".log"))
         elif isinstance(data, BaseException):
-            _log_exception_to_log(data, base_path + ".log")
+            _log_exception_to_log(data, base_path.with_suffix(".log"))
         else:
             raise ValueError("Unsupported data type. Must be list, dict, DataFrame, str, or BaseException.")
-        print(f"Log saved to: '{base_path}'")
+        _LOGGER.info(f"Log saved to: '{base_path}'")
     except Exception as e:
-        print(f"Error in custom_logger: {e}")
+        _LOGGER.error(f"Log not saved: {e}")
 def _log_list_to_txt(data: List[Any], path: Path) -> None:
@@ -154,3 +157,37 @@ def _log_dict_to_json(data: Dict[Any, Any], path: Path) -> None:
 def info():
     _script_info(__all__)
+def _get_logger(name: str = "ml_tools", level: int = logging.INFO):
+    """
+    Initializes and returns a configured logger instance.
+    - `logger.info()`
+    - `logger.warning()`
+    - `logger.error()` the program can potentially recover.
+    - `logger.critical()` the program is going to crash.
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    # Prevents adding handlers multiple times if the function is called again
+    if not logger.handlers:
+        handler = logging.StreamHandler(sys.stdout)
+        # Define the format string and the date format separately
+        log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        date_format = '%Y-%m-%d %H:%M' # Format: Year-Month-Day Hour:Minute
+        # Pass both the format and the date format to the Formatter
+        formatter = logging.Formatter(log_format, datefmt=date_format)
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    logger.propagate = False
+    return logger
+# Create a single logger instance to be imported by other modules
+_LOGGER = _get_logger()

ml_tools/utilities.py CHANGED Viewed

@@ -221,7 +221,8 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path]):
     """
     datasets_path = make_fullpath(datasets_dir)
     for df_name, df_path in list_csv_paths(datasets_path).items():
-        df, _ = load_dataframe(df_path)
+        df: pd.DataFrame
+        df, _ = load_dataframe(df_path, kind="pandas") # type: ignore
         yield df, df_name
@@ -596,6 +597,22 @@ def distribute_datasets_by_target(
         yield target, subset
+class LogKeys:
+    """
+    Used for ML scripts only
+    Centralized keys for logging and history.
+    """
+    # --- Epoch Level ---
+    TRAIN_LOSS = 'train_loss'
+    VAL_LOSS = 'val_loss'
+    # --- Batch Level ---
+    BATCH_LOSS = 'loss'
+    BATCH_INDEX = 'batch'
+    BATCH_SIZE = 'size'
 def _script_info(all_data: list[str]):
     """
     List available names.

dragon_ml_toolbox-2.3.0.dist-info/RECORD DELETED Viewed

@@ -1,21 +0,0 @@
-dragon_ml_toolbox-2.3.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-2.3.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
-ml_tools/ETL_engineering.py,sha256=ns8HsLWZhByurvjtUUW10p7If1h1O5-btUfCRXxzkME,31568
-ml_tools/MICE_imputation.py,sha256=1fovHycZMdZ6OgVh_bk8-r3wGi4rqf6rS10LOEWYaQo,11177
-ml_tools/PSO_optimization.py,sha256=gi56mF-q6BApYwhAd9jix0xiYz595WTPcUh7afZsRJ4,25378
-ml_tools/VIF_factor.py,sha256=lpM3Z2X_iZfXUWbCbURoeI0Tb196lU0bAsRo7q6AzBM,10235
-ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ml_tools/_particle_swarm_optimization.py,sha256=b_eNNkA89Y40hj76KauivT8KLScH1B9wF2IXptOqkOw,22220
-ml_tools/data_exploration.py,sha256=Fzbz_DKZ7F2e3-JbahLqKr3aP6lt9aCK9rNOHvR7nlA,23665
-ml_tools/datasetmaster.py,sha256=EFUEX-tqq94Ak1rXXYR-XaX85olrxvF2EuytdzUK7y0,29131
-ml_tools/ensemble_learning.py,sha256=q9jbu7SupvXz61sURFQ9V2-7gUsLbA3cSgyb2MQFyyc,37351
-ml_tools/handle_excel.py,sha256=Uasx-DX7RNVQSzGHVJhX7UQ9RgBbX5H1ud1Hw_y8Kp4,12944
-ml_tools/logger.py,sha256=_k7WJdpFJj3IsjOgvjLJgUFZyF8RK3Jlgp5tAu_dLQU,4767
-ml_tools/pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
-ml_tools/trainer.py,sha256=WAZ4EdrZuTOAnGXRWV3XcLNce4s7EKGf2-qchLC08Ik,15702
-ml_tools/utilities.py,sha256=T6AnNEQjUDnMAMSIJ8yZqToAVESIlEKK0bGBEm3sAUU,20670
-ml_tools/vision_helpers.py,sha256=idQ-Ugp1IdsvwXiYyhYa9G3rTRTm37YRpkQDLEpANHM,7701
-dragon_ml_toolbox-2.3.0.dist-info/METADATA,sha256=4wivV_JKPd83xNzf6xzSfCwxiZgvYL5uW4yE6Da8tnU,2974
-dragon_ml_toolbox-2.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-2.3.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-2.3.0.dist-info/RECORD,,

ml_tools/trainer.py DELETED Viewed

@@ -1,346 +0,0 @@
-import time
-import numpy
-from typing import Literal
-from torch.utils.data import DataLoader, Dataset
-import matplotlib.pyplot as plt
-import torch
-from torch import nn
-from sklearn.metrics import mean_squared_error, classification_report, ConfusionMatrixDisplay, roc_curve, roc_auc_score, r2_score, median_absolute_error
-from .utilities import _script_info
-__all__ = [
-    "MyTrainer"
-]
-class MyTrainer():
-    def __init__(self, model, train_dataset: Dataset, test_dataset: Dataset, kind: Literal["regression", "classification"],
-                 criterion=None , shuffle: bool=True, batch_size: float=3, device: Literal["cpu", "cuda", "mps"]='cpu', learn_rate: float=0.001, dataloader_workers: int=2):
-        """
-        Automates the training process of a PyTorch Model using Adam optimization by default (`self.optimizer`).
-        `kind`: Will be used to compute and display metrics after training is complete.
-        `shuffle`: Whether to shuffle dataset batches at every epoch. Default is True.
-        `criterion`: Loss function. If 'None', defaults to `nn.NLLLoss` for classification or `nn.MSELoss` for regression.
-        `batch_size` Represents the fraction of the original dataset size to be used per batch. If an integer is passed, use that many samples, instead. Default is 3 samples at a time.
-        `learn_rate` Model learning rate. Default is 0.001.
-        `dataloader_workers` Subprocesses to use for data loading. Default is 2.
-        """
-        # Validate kind
-        if kind not in ["regression", "classification"]:
-            raise TypeError("Kind must be 'regression' or 'classification'.")
-        # Validate batch size
-        batch_error = "Batch must a float in range [0.01, 1) or an integer."
-        if isinstance(batch_size, (float, int)):
-            if (1.00 > batch_size >= 0.01):
-                train_batch = int(len(train_dataset) * batch_size)
-                test_batch = int(len(test_dataset) * batch_size)
-            elif batch_size > len(train_dataset) or batch_size > len(test_dataset):
-                raise ValueError(batch_error + " Size is greater than dataset size.")
-            elif batch_size >= 1:
-                train_batch = int(batch_size)
-                test_batch = int(batch_size)
-            else:
-                raise ValueError(batch_error)
-        else:
-            raise TypeError(batch_error)
-        # Validate device
-        if device == "cuda":
-            if not torch.cuda.is_available():
-                print("CUDA not available, switching to CPU.")
-                device = "cpu"
-        elif device == "mps":
-            if not torch.backends.mps.is_available():
-                print("MPS not available, switching to CPU.")
-                device = "cpu"
-        # Validate criterion
-        if criterion is None:
-            if kind == "regression":
-                self.criterion = nn.MSELoss()
-            else:
-                self.criterion = nn.NLLLoss()
-        else:
-            self.criterion = criterion
-        # Validate dataloader workers
-        if not isinstance(dataloader_workers, int):
-            raise TypeError("Dataloader workers must be an integer value.")
-        # Check last layer in the model, implementation pending
-        # last_layer_name, last_layer = next(reversed(model._modules.items()))
-        # if isinstance(last_layer, nn.Linear):
-        #     pass
-        self.train_loader = DataLoader(dataset=train_dataset, batch_size=train_batch, shuffle=shuffle, num_workers=dataloader_workers, pin_memory=True if device=="cuda" else False)
-        self.test_loader = DataLoader(dataset=test_dataset, batch_size=test_batch, shuffle=shuffle, num_workers=dataloader_workers, pin_memory=True if device=="cuda" else False)
-        self.kind = kind
-        self.device = torch.device(device)
-        self.model = model.to(self.device)
-        self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=learn_rate)
-    def auto_train(self, epochs: int=200, patience: int=3, cmap: Literal["viridis", "Blues", "Greens", "Reds", "plasma", "coolwarm"]="Blues",
-                   roc: bool=False, **model_params):
-        """
-        Start training-validation process of the model.
-        `patience` is the number of consecutive times the Validation Loss is allowed to increase before early-stopping the training process.
-        `cmap` Color map to use for the confusion matrix.
-        `model_params` Keywords parameters specific to the model, if any.
-        `roc` Whether to display the Receiver Operating Characteristic (ROC) Curve, for binary classification only.
-        """
-        metric_name = "accuracy" if self.kind == "classification" else "RMSE"
-        previous_val_loss = None
-        epoch_tracker = 0
-        warnings = 0
-        feedback = None
-        val_losses = list()
-        train_losses = list()
-        # Validate inputs
-        if isinstance(epochs, int):
-            if epochs < 1:
-                print("Invalid number of epochs")
-                return None
-        else:
-            print("Invalid number of epochs")
-            return None
-        if isinstance(patience, int):
-            if patience < 0:
-                print("Invalid value for patience")
-                return None
-        else:
-            print("Invalid value for patience")
-            return None
-        if cmap not in ["viridis", "Blues", "Greens", "Reds", "plasma", "coolwarm"]:
-            print("Invalid cmap code, 'coolwarm' selected by default")
-            cmap = "coolwarm"
-        # Time training
-        start_time = time.time()
-        for epoch in range(1, epochs+1):
-            # Train model
-            self.model.train()
-            current_train_loss = 0
-            # Keep track of predictions and true labels on the last epoch to use later on scikit-learn
-            predictions_list = list()
-            true_labels_list = list()
-            probabilities_list = list()
-            for features, target in self.train_loader:
-                # features, targets to device
-                features = features.to(self.device)
-                target = target.to(self.device)
-                self.optimizer.zero_grad()
-                output = self.model(features, **model_params)
-                # check shapes
-                # print(features.shape, target.shape, output.shape)
-                # For Binary Cross Entropy
-                if isinstance(self.criterion, (nn.BCELoss, nn.BCEWithLogitsLoss)):
-                    target = target.to(torch.float32)
-                elif isinstance(self.criterion, (nn.MSELoss)):
-                    output = output.view_as(target)
-                train_loss = self.criterion(output, target)
-                # Cumulative loss for current epoch on all batches
-                current_train_loss += train_loss.item()
-                # Backpropagation
-                train_loss.backward()
-                self.optimizer.step()
-            # Average Train Loss per sample
-            current_train_loss /= len(self.train_loader.dataset)
-            train_losses.append(current_train_loss)
-            # Evaluate
-            self.model.eval()
-            current_val_loss = 0
-            correct = 0
-            with torch.no_grad():
-                for features, target in self.test_loader:
-                # features, targets to device
-                    features = features.to(self.device)
-                    target = target.to(self.device)
-                    output = self.model(features, **model_params)
-                    # Save true labels for current batch (in case random shuffle was used)
-                    true_labels_list.append(target.view(-1,1).cpu().numpy())
-                    # For Binary Cross Entropy
-                    if isinstance(self.criterion, (nn.BCELoss, nn.BCEWithLogitsLoss)):
-                        target = target.to(torch.float32)
-                    elif isinstance(self.criterion, (nn.MSELoss)):
-                        output = output.view_as(target)
-                    current_val_loss += self.criterion(output, target).item()
-                    # Save predictions of current batch, get accuracy
-                    if self.kind == "classification":
-                        predictions_list.append(output.argmax(dim=1).view(-1,1).cpu().numpy())
-                        correct += output.argmax(dim=1).eq(target).sum().item()
-                        if roc:
-                            probabilities_local = nn.functional.softmax(output, dim=1)
-                            probabilities_list.append(probabilities_local.cpu().numpy())
-                    else:   # Regression
-                        predictions_list.append(output.view(-1,1).cpu().numpy())
-            # Average Validation Loss per sample
-            current_val_loss /= len(self.test_loader.dataset)
-            val_losses.append(current_val_loss)
-            # Concatenate all predictions and true labels
-            predictions = numpy.concatenate(predictions_list, axis=0)
-            true_labels = numpy.concatenate(true_labels_list, axis=0)
-            if roc:
-                probabilities = numpy.concatenate(probabilities_list, axis=0)
-            # Accuracy / RMSE
-            if self.kind == "classification":
-                accuracy = correct / len(self.test_loader.dataset)
-                accuracy = str(round(100*accuracy, ndigits=1)) + "%"
-            else: # Regression
-                accuracy = numpy.sqrt(mean_squared_error(y_true=true_labels, y_pred=predictions))
-                accuracy = str(round(accuracy, ndigits=4))
-            # Print details
-            details_format = f'epoch {epoch:2}:    training loss: {current_train_loss:6.4f}    validation loss: {current_val_loss:6.4f}    {metric_name}: {accuracy}'
-            if (epoch % max(1, int(0.05*epochs)) == 0) or epoch in [1, 3, 5]:
-                print(details_format)
-            # Compare validation loss per epoch
-            # First run
-            if previous_val_loss is None:
-                previous_val_loss = current_val_loss
-            # If validation loss is increasing or the same (not improving) use patience
-            elif current_val_loss >= previous_val_loss:
-                if epoch == epoch_tracker + 1:
-                    warnings += 1
-                else:
-                    warnings = 1
-                epoch_tracker = epoch
-            # If validation loss decreased
-            else:
-                warnings = 0
-            # If patience is exhausted
-            if warnings == patience:
-                feedback = f"👁️ Validation Loss has increased {patience} consecutive times."
-                break
-            # Training must continue for another epoch
-            previous_val_loss = current_val_loss
-        # if all epochs have been completed
-        else:
-            feedback = "Training has been completed without any early-stopping criteria."
-        # Print feedback message
-        print('\n', details_format)
-        print(feedback, f"\n")
-        # Show elapsed time
-        elapsed_time = time.time() - start_time
-        minutes, seconds = divmod(elapsed_time, 60)
-        print(f"Elapsed time:  {minutes:.0f} minutes  {seconds:2.0f} seconds  {epoch} epochs")
-        # Plot losses
-        fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10,4), dpi=150, sharey=False)
-        ax1.plot(range(2, epoch+1), train_losses[1:])
-        ax1.set_title("Training Loss")
-        ax1.set_xlabel("Epochs")
-        ax1.set_ylabel("Average loss per sample")
-        ax2.plot(range(2, epoch+1), val_losses[1:])
-        ax2.set_title("Validation Loss")
-        ax2.set_xlabel("Epochs")
-        ax2.set_ylabel("Average loss per sample")
-        plt.tight_layout()
-        plt.show()
-        # Metrics
-        # Display metrics
-        if self.kind == "regression":
-            rmse = numpy.sqrt(mean_squared_error(y_true=true_labels, y_pred=predictions))
-            r2 = r2_score(y_true=true_labels, y_pred=predictions)
-            medae = median_absolute_error(y_true=true_labels, y_pred=predictions)
-            print(f"Root Mean Squared Error (RMSE): {rmse:6.4f}    (range 0 to \u221E)")
-            print(f"Median Absolute Error (MedAE): {medae:6.4f}    (range: 0 to \u221E)")
-            print(f"Coefficient of Determination (R2 Score): {r2:4.2f}    (range: -\u221E to 1)\n")
-        elif self.kind == "classification":
-            print(classification_report(y_true=true_labels, y_pred=predictions))
-            ConfusionMatrixDisplay.from_predictions(y_true=true_labels, y_pred=predictions, cmap=cmap)
-            # ROC curve & Area under the curve
-            if roc:
-                false_positives, true_positives, thresholds = roc_curve(y_true=true_labels, y_score=probabilities[:,1])
-                area_under_curve = roc_auc_score(y_true=true_labels, y_score=probabilities[:,1])
-                plt.figure(figsize=(4,4))
-                plt.plot(false_positives, true_positives)
-                plt.title("Receiver Operating Characteristic (ROC) Curve")
-                plt.xlabel("False Positive Rate")
-                plt.ylabel("True Positive Rate")
-                plt.show()
-                print(f"Area under the curve score: {area_under_curve:4.2f}")
-        else:
-            print("Error encountered while retrieving 'model.kind' attribute.")
-    def rnn_forecast(self, sequence: torch.Tensor, steps: int):
-        """
-        Runs a sequential forecast for a RNN, where each new prediction is obtained by feeding the previous prediction.
-        The input tensor representing a sequence must be of shape `(sequence length, number of features)` with normalized values (if needed).
-        Args:
-            `sequence`: Last subsequence of the sequence.
-            `steps`: Number of future time steps to predict.
-        Returns: Numpy array of predictions.
-        """
-        self.model.eval()
-        with torch.no_grad():
-            # send sequence to device
-            sequence = sequence.to(self.device)
-            # Make a dummy list in memory
-            sequences = [torch.zeros_like(sequence, device=self.device, requires_grad=False) for _ in range(steps)]
-            sequences[0] = sequence
-            # Store predictions
-            predictions = list()
-            # Get predictions
-            for i in range(steps):
-                in_seq = sequences[i]
-                output = self.model(in_seq)
-                # Last timestamp
-                output = output[-1].view(1,-1)
-                # Save prediction
-                # Check if it is a single feature, get value
-                if output.shape[1] == 1:
-                    predictions.append(output.item())
-                # Else, return a list of lists
-                else:
-                    predictions.append(output.squeeze().cpu().tolist())
-                # Create next sequence
-                if i < steps-1:
-                    current_seq = sequences[i]
-                    new_seq = torch.concatenate([current_seq[1:], output], dim=0).to(self.device)
-                    sequences[i+1] = new_seq
-        # Cast to array and return
-        predictions = numpy.array(predictions)
-        return predictions
-def info():
-    _script_info(__all__)

dragon-ml-toolbox 2.3.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

dragon-ml-toolbox 2.3.0py3-none-any.whl → 3.0.0py3-none-any.whl