PyPI - dragon-ml-toolbox - Versions diffs - 5.1.0__py3-none-any.whl → 5.2.0__py3-none-any.whl - Mend

dragon-ml-toolbox 5.1.0py3-none-any.whl → 5.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (9) hide show

{dragon_ml_toolbox-5.1.0.dist-info → dragon_ml_toolbox-5.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.1.0
+Version: 5.2.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -141,10 +141,11 @@ pip install "dragon-ml-toolbox[pytorch]"
 ```bash
 custom_logger
 data_exploration
-datasetmaster
 ensemble_learning
 ensemble_inference
 ETL_engineering
+ML_datasetmaster
+ML_models
 ML_callbacks
 ML_evaluation
 ML_trainer

{dragon_ml_toolbox-5.1.0.dist-info → dragon_ml_toolbox-5.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,13 @@
-dragon_ml_toolbox-5.1.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-5.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
+dragon_ml_toolbox-5.2.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-5.2.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
 ml_tools/ETL_engineering.py,sha256=4wwZXi9_U7xfCY70jGBaKniOeZ0m75ppxWpQBd_DmLc,39369
 ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
 ml_tools/MICE_imputation.py,sha256=b6ZTs8RedXFifOpuMCzr68xM16mCBVh1Ua6kcGfiVtg,11462
 ml_tools/ML_callbacks.py,sha256=0a-Rbr0Xp_B1FNopOKBBmuJ4MqazS5JgDiT7wx1dHvE,13161
-ml_tools/ML_datasetmaster.py,sha256=jrRK4fuVhRse4fJm_p3as5YPNXssT4rd6qYR6gJvxls,33327
+ml_tools/ML_datasetmaster.py,sha256=6sOauDObiubqulLTU1nLQXWFdMdZ6lPiz5o9xUzZNEc,33327
 ml_tools/ML_evaluation.py,sha256=4dVqe6JF1Ukmk1sAcY8E5EG1oB1_oy2HXE5OT-pZwCs,10273
 ml_tools/ML_inference.py,sha256=Fh-X2UQn3AznWBjf-7iPSxwE-EzkGQm1VEIRUAkURmE,5336
+ml_tools/ML_models.py,sha256=vG4i_lmQ9Jz6twO2jmu6teHET8qluKO02odSwzYfvRI,5436
 ml_tools/ML_optimization.py,sha256=u3H-TYGycKDdog-njkMfiAxd8TBtmGeLLFplBPRmmxk,10057
 ml_tools/ML_trainer.py,sha256=dJjMfCEEM07Txy9KEH-2srZ3CZUa4lFWTJhpNWQ4Ndk,14974
 ml_tools/PSO_optimization.py,sha256=stH2Ux1sftQgX5EwLc85kHcoT4Rmz6zv7sH2yzf4Zrw,22710
@@ -15,7 +16,6 @@ ml_tools/SQL.py,sha256=9zzS6AFEJM9aj6nE31hDe8S9TqLonk-J1amwZoiHNbk,10468
 ml_tools/VIF_factor.py,sha256=2nUMupfUoogf8o6ghoFZk_OwWhFXU0R3C9Gj0HOlI14,10415
 ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
 ml_tools/_logger.py,sha256=TpgYguxO-CWYqqgLW0tqFjtwZ58PE_W2OCfWNGZr0n0,1175
-ml_tools/_pytorch_models.py,sha256=ewPPsTHgmRPzMMWwObZOdH1vxm2Ij2VWZP38NC6zSH4,10135
 ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/custom_logger.py,sha256=njM_0XPbQ1S-x5LeSQAaTo2if-XVOR_pQSGg4EDeiTU,4603
 ml_tools/data_exploration.py,sha256=qc_Oolxco2x9IhlYu5zPIuVBGiBw65HnypuGm8cQOOM,23677
@@ -26,7 +26,7 @@ ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
 ml_tools/optimization_tools.py,sha256=MuT4OG7_r1QqLUti-yYix7QeCpglezD0oe9BDCq0QXk,5086
 ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
 ml_tools/utilities.py,sha256=mz-M351DzxWxnYVcLX-7ZQ6c-RGoCV9g4VTS9Qif2Es,18348
-dragon_ml_toolbox-5.1.0.dist-info/METADATA,sha256=I9LBhqNEGmKUyWRKUQM-XW1Hq8h1FC740hq_b4kdgQA,6625
-dragon_ml_toolbox-5.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-5.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-5.1.0.dist-info/RECORD,,
+dragon_ml_toolbox-5.2.0.dist-info/METADATA,sha256=wDf6l5JqtocGsJ7mOG8Rbe8G7LdEx6C2SXeDw3aJY_4,6638
+dragon_ml_toolbox-5.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-5.2.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-5.2.0.dist-info/RECORD,,

ml_tools/ML_datasetmaster.py CHANGED Viewed

@@ -447,7 +447,7 @@ class SimpleDatasetMaker:
     @property
     def id(self) -> Optional[str]:
-        """Returns teh object identifier if any."""
+        """Returns the object identifier if any."""
         return self._id
     def dataframes_info(self) -> None:

ml_tools/ML_models.py ADDED Viewed

@@ -0,0 +1,134 @@
+import torch
+from torch import nn
+from ._script_info import _script_info
+from typing import List
+__all__ = [
+    "MultilayerPerceptron",
+    "SequencePredictorLSTM"
+]
+class MultilayerPerceptron(nn.Module):
+    """
+    Creates a versatile Multilayer Perceptron (MLP) for regression or classification tasks.
+    This model generates raw output values (logits) suitable for use with loss
+    functions like `nn.CrossEntropyLoss` (for classification) or `nn.MSELoss`
+    (for regression).
+    Args:
+        in_features (int): The number of input features (e.g., columns in your data).
+        out_targets (int): The number of output targets. For regression, this is
+            typically 1. For classification, it's the number of classes.
+        hidden_layers (list[int]): A list where each integer represents the
+            number of neurons in a hidden layer. Defaults to [40, 80, 40].
+        drop_out (float): The dropout probability for neurons in each hidden
+            layer. Must be between 0.0 and 1.0. Defaults to 0.2.
+    ### Rules of thumb:
+    - Choose a number of hidden neurons between the size of the input layer and the size of the output layer.
+    - The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.
+    - The number of hidden neurons should be less than twice the size of the input layer.
+    """
+    def __init__(self, in_features: int, out_targets: int,
+                 hidden_layers: List[int] = [40, 80, 40], drop_out: float = 0.2) -> None:
+        super().__init__()
+        # --- Validation ---
+        if not isinstance(in_features, int) or in_features < 1:
+            raise ValueError("in_features must be a positive integer.")
+        if not isinstance(out_targets, int) or out_targets < 1:
+            raise ValueError("out_targets must be a positive integer.")
+        if not isinstance(hidden_layers, list) or not all(isinstance(n, int) for n in hidden_layers):
+            raise TypeError("hidden_layers must be a list of integers.")
+        if not (0.0 <= drop_out < 1.0):
+            raise ValueError("drop_out must be a float between 0.0 and 1.0.")
+        # --- Build network layers ---
+        layers = []
+        current_features = in_features
+        for neurons in hidden_layers:
+            layers.extend([
+                nn.Linear(current_features, neurons),
+                nn.BatchNorm1d(neurons),
+                nn.ReLU(),
+                nn.Dropout(p=drop_out)
+            ])
+            current_features = neurons
+        # Add the final output layer
+        layers.append(nn.Linear(current_features, out_targets))
+        self._layers = nn.Sequential(*layers)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Defines the forward pass of the model."""
+        return self._layers(x)
+class SequencePredictorLSTM(nn.Module):
+    """
+    A simple LSTM-based network for sequence-to-sequence prediction tasks.
+    This model is designed for datasets where each input sequence maps to an
+    output sequence of the same length. It's suitable for forecasting problems
+    prepared by the `SequenceMaker` class.
+    The expected input shape is `(batch_size, sequence_length, features)`.
+    Args:
+        features (int): The number of features in the input sequence. Defaults to 1.
+        hidden_size (int): The number of features in the LSTM's hidden state.
+                           Defaults to 100.
+        recurrent_layers (int): The number of recurrent LSTM layers. Defaults to 1.
+        dropout (float): The dropout probability for all but the last LSTM layer.
+                         Defaults to 0.
+    """
+    def __init__(self, features: int = 1, hidden_size: int = 100,
+                 recurrent_layers: int = 1, dropout: float = 0):
+        super().__init__()
+        # --- Validation ---
+        if not isinstance(features, int) or features < 1:
+            raise ValueError("features must be a positive integer.")
+        if not isinstance(hidden_size, int) or hidden_size < 1:
+            raise ValueError("hidden_size must be a positive integer.")
+        if not isinstance(recurrent_layers, int) or recurrent_layers < 1:
+            raise ValueError("recurrent_layers must be a positive integer.")
+        if not (0.0 <= dropout < 1.0):
+            raise ValueError("dropout must be a float between 0.0 and 1.0.")
+        self.lstm = nn.LSTM(
+            input_size=features,
+            hidden_size=hidden_size,
+            num_layers=recurrent_layers,
+            dropout=dropout,
+            batch_first=True  # This is crucial for (batch, seq, feature) input
+        )
+        self.linear = nn.Linear(in_features=hidden_size, out_features=features)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Defines the forward pass.
+        Args:
+            x (torch.Tensor): The input tensor with shape
+                              (batch_size, sequence_length, features).
+        Returns:
+            torch.Tensor: The output tensor with shape
+                          (batch_size, sequence_length, features).
+        """
+        # The LSTM returns the full output sequence and the final hidden/cell states
+        lstm_out, _ = self.lstm(x)
+        # Pass the LSTM's output sequence to the linear layer
+        predictions = self.linear(lstm_out)
+        return predictions
+def info():
+    _script_info(__all__)

ml_tools/_pytorch_models.py DELETED Viewed

@@ -1,239 +0,0 @@
-import torch
-from torch import nn
-from ._script_info import _script_info
-__all__ = [
-    "MyNeuralNetwork",
-    "MyLSTMNetwork"
-]
-class MyNeuralNetwork(nn.Module):
-    def __init__(self, in_features: int, out_targets: int, hidden_layers: list[int]=[40,80,40], drop_out: float=0.2) -> None:
-        """
-        Creates a basic Neural Network.
-        * For Regression the last layer is Linear.
-        * For Classification the last layer is Logarithmic Softmax.
-        `out_targets` Is the number of expected output classes for classification; or `1` for regression.
-        `hidden_layers` takes a list of integers. Each position represents a hidden layer and its number of neurons.
-        * One rule of thumb is to choose a number of hidden neurons between the size of the input layer and the size of the output layer.
-        * Another rule suggests that the number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.
-        * Another rule suggests that the number of hidden neurons should be less than twice the size of the input layer.
-        `drop_out` represents the probability of neurons to be set to '0' during the training process of each layer. Range [0.0, 1.0).
-        """
-        super().__init__()
-        # Validate inputs and outputs
-        if isinstance(in_features, int) and isinstance(out_targets, int):
-            if in_features < 1 or out_targets < 1:
-                raise ValueError("Inputs or Outputs must be an integer value.")
-        else:
-            raise TypeError("Inputs or Outputs must be an integer value.")
-        # Validate layers
-        if isinstance(hidden_layers, list):
-            for number in hidden_layers:
-                if not isinstance(number, int):
-                    raise TypeError("Number of neurons per hidden layer must be an integer value.")
-        else:
-            raise TypeError("hidden_layers must be a list of integer values.")
-        # Validate dropout
-        if isinstance(drop_out, float):
-            if 1.0 > drop_out >= 0.0:
-                pass
-            else:
-                raise TypeError("drop_out must be a float value greater than or equal to 0 and less than 1.")
-        elif drop_out == 0:
-            pass
-        else:
-            raise TypeError("drop_out must be a float value greater than or equal to 0 and less than 1.")
-        # Create layers
-        layers = list()
-        for neurons in hidden_layers:
-            layers.append(nn.Linear(in_features=in_features, out_features=neurons))
-            layers.append(nn.BatchNorm1d(num_features=neurons))
-            layers.append(nn.ReLU())
-            layers.append(nn.Dropout(p=drop_out))
-            in_features = neurons
-        # Append output layer
-        layers.append(nn.Linear(in_features=in_features, out_features=out_targets))
-        # Check for classification or regression output
-        if out_targets > 1:
-            # layers.append(nn.Sigmoid())
-            layers.append(nn.LogSoftmax(dim=1))
-        # Create a container for layers
-        self._layers = nn.Sequential(*layers)
-    # Override forward()
-    def forward(self, X: torch.Tensor) -> torch.Tensor:
-        X = self._layers(X)
-        return X
-class _MyConvolutionalNetwork(nn.Module):
-    def __init__(self, outputs: int, color_channels: int=3, img_size: int=256, drop_out: float=0.2):
-        """
-        - EDUCATIONAL PURPOSES ONLY, not optimized and requires lots of memory.
-        Create a basic Convolutional Neural Network with two convolution layers with a pooling layer after each convolution.
-        Args:
-            `outputs`: Number of output classes (1 for regression).
-            `color_channels`: Color channels. Default is 3 (RGB).
-            `img_size`: Width and Height of image samples, must be square images. Default is 200.
-            `drop_out`: Neuron drop out probability. Default is 20%.
-        """
-        super().__init__()
-        # Validate outputs number
-        integer_error = " must be an integer greater than 0."
-        if isinstance(outputs, int):
-            if outputs < 1:
-                raise ValueError("Outputs" + integer_error)
-        else:
-            raise TypeError("Outputs" + integer_error)
-        # Validate color channels
-        if isinstance(color_channels, int):
-            if color_channels < 1:
-                raise ValueError("Color Channels" + integer_error)
-        else:
-            raise TypeError("Color Channels" + integer_error)
-        # Validate image size
-        if isinstance(img_size, int):
-            if img_size < 1:
-                raise ValueError("Image size" + integer_error)
-        else:
-            raise TypeError("Image size" + integer_error)
-        # Validate drop out
-        if isinstance(drop_out, float):
-            if 1.0 > drop_out >= 0.0:
-                pass
-            else:
-                raise TypeError("Drop out must be a float value greater than or equal to 0 and less than 1.")
-        elif drop_out == 0:
-            pass
-        else:
-            raise TypeError("Drop out must be a float value greater than or equal to 0 and less than 1.")
-        # 2 convolutions, 2 pooling layers
-        self._cnn_layers = nn.Sequential(
-            nn.Conv2d(in_channels=color_channels, out_channels=(color_channels * 2), kernel_size=5, stride=1, padding=1),
-            nn.MaxPool2d(kernel_size=4, stride=(4,4)),
-            nn.Conv2d(in_channels=(color_channels * 2), out_channels=(color_channels * 3), kernel_size=3, stride=1, padding=0),
-            nn.AvgPool2d(kernel_size=2, stride=(2,2))
-        )
-        # Calculate output features
-        flat_features = int(int((int((img_size + 2 - (5-1))//4) - (3-1))//2)**2) * (color_channels * 3)
-        # Make a standard ANN
-        ann = MyNeuralNetwork(in_features=flat_features, hidden_layers=[int(flat_features*0.5), int(flat_features*0.2), int(flat_features*0.005)],
-                              out_targets=outputs, drop_out=drop_out)
-        self._ann_layers = ann._layers
-        # Join CNN and ANN
-        self._structure = nn.Sequential(self._cnn_layers, nn.Flatten(), self._ann_layers)
-        # Send to CUDA if available
-        # if torch.cuda.is_available():
-        #     self.to('cuda')
-    # Override forward()
-    def forward(self, X: torch.Tensor) -> torch.Tensor:
-        X = self._structure(X)
-        return X
-class MyLSTMNetwork(nn.Module):
-    def __init__(self, features: int=1, hidden_size: int=100, recurrent_layers: int=1, dropout: float=0, reset_memory: bool=False, **kwargs):
-        """
-        Create a simple Recurrent Neural Network to predict 1 time step into the future of sequential data.
-        The sequence should be a 2D tensor with shape (sequence_length, number_of_features).
-        Args:
-            * `features`: Number of features representing the sequence. Defaults to 1.
-            * `hidden_size`: Hidden size of the LSTM model. Defaults to 100.
-            * `recurrent_layers`: Number of recurrent layers to use. Defaults to 1.
-            * `dropout`: Probability of dropping out neurons in each recurrent layer, except the last layer. Defaults to 0.
-            * `reset_memory`: Reset the initial hidden state and cell state for the recurrent layers at every epoch. Defaults to False.
-            * `kwargs`: Create custom attributes for the model.
-        Custom forward() parameters:
-            * `batch_size=1` (int): batch size for the LSTM net.
-            * `return_last_timestamp=False` (bool): Return only the value at `output[-1]`
-        """
-        # validate input size
-        if not isinstance(features, int):
-            raise TypeError("Input size must be an integer value.")
-        # validate hidden size
-        if not isinstance(hidden_size, int):
-            raise TypeError("Hidden size must be an integer value.")
-        # validate layers
-        if not isinstance(recurrent_layers, int):
-            raise TypeError("Number of recurrent layers must be an integer value.")
-        # validate dropout
-        if isinstance(dropout, (float, int)):
-            if 0 <= dropout < 1:
-                pass
-            else:
-                raise ValueError("Dropout must be a float in range [0.0, 1.0)")
-        else:
-            raise TypeError("Dropout must be a float in range [0.0, 1.0)")
-        super().__init__()
-        # Initialize memory
-        self._reset = reset_memory
-        self._memory = None
-        # hidden size and features shape
-        self._hidden = hidden_size
-        self._features = features
-        # RNN
-        self._lstm = nn.LSTM(input_size=features, hidden_size=self._hidden, num_layers=recurrent_layers, dropout=dropout)
-        # Fully connected layer
-        self._ann = nn.Linear(in_features=self._hidden, out_features=features)
-        # Parse extra parameters
-        for key, value in kwargs.items():
-            setattr(self, key, value)
-    def forward(self, seq: torch.Tensor, batch_size: int=1, return_last_timestamp: bool=False) -> torch.Tensor:
-        # reset memory
-        if self._reset:
-            self._memory = None
-        # reshape sequence to feed RNN
-        seq = seq.view(-1, batch_size, self._features)
-        # Pass sequence through RNN
-        seq, self._memory = self._lstm(seq, self._memory)
-        # Detach hidden state and cell state to prevent backpropagation error
-        self._memory = tuple(m.detach() for m in self._memory)
-        # Reshape outputs
-        seq = seq.view(-1, self._hidden)
-        # Pass sequence through fully connected layer
-        output = self._ann(seq)
-        # Return prediction of 1 time step in the future
-        if return_last_timestamp:
-            return output[-1].view(1,-1) #last item as a tensor.
-        else:
-            return output
-def info():
-    _script_info(__all__)

{dragon_ml_toolbox-5.1.0.dist-info → dragon_ml_toolbox-5.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-5.1.0.dist-info → dragon_ml_toolbox-5.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-5.1.0.dist-info → dragon_ml_toolbox-5.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-5.1.0.dist-info → dragon_ml_toolbox-5.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 5.1.0__py3-none-any.whl → 5.2.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 5.1.0py3-none-any.whl → 5.2.0py3-none-any.whl