PyPI - dragon-ml-toolbox - Versions diffs - 5.0.0__tar.gz → 5.1.0__tar.gz - Mend

dragon-ml-toolbox 5.0.0tar.gz → 5.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (37) hide show

{dragon_ml_toolbox-5.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-5.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.0.0
+Version: 5.1.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -268,5 +268,5 @@ After installation, import modules like this:
 ```python
 from ml_tools.utilities import serialize_object, deserialize_object
-from ml_tools.custom_logger import custom_logger
+from ml_tools import custom_logger
 ```

{dragon_ml_toolbox-5.0.0 → dragon_ml_toolbox-5.1.0}/README.md RENAMED Viewed

@@ -187,5 +187,5 @@ After installation, import modules like this:
 ```python
 from ml_tools.utilities import serialize_object, deserialize_object
-from ml_tools.custom_logger import custom_logger
+from ml_tools import custom_logger
 ```

{dragon_ml_toolbox-5.0.0 → dragon_ml_toolbox-5.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 5.0.0
+Version: 5.1.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -268,5 +268,5 @@ After installation, import modules like this:
 ```python
 from ml_tools.utilities import serialize_object, deserialize_object
-from ml_tools.custom_logger import custom_logger
+from ml_tools import custom_logger
 ```

{dragon_ml_toolbox-5.0.0 → dragon_ml_toolbox-5.1.0}/ml_tools/ML_datasetmaster.py RENAMED Viewed

@@ -21,6 +21,7 @@ from ._script_info import _script_info
 # --- public-facing API ---
 __all__ = [
     "DatasetMaker",
+    "SimpleDatasetMaker",
     "VisionDatasetMaker",
     "SequenceMaker",
     "ResizeAspectFill",
@@ -328,7 +329,7 @@ class DatasetMaker(_BaseMaker):
         return self.scaler.inverse_transform(data_np)
-    def get_datasets(self) -> Tuple[_PytorchDataset, _PytorchDataset]:
+    def get_datasets(self) -> Tuple[Dataset, Dataset]:
         """Primary method to get the final PyTorch Datasets."""
         if not self._is_split:
             raise RuntimeError("Data has not been split yet. Call .split_data() or .process() first.")
@@ -370,6 +371,95 @@ class DatasetMaker(_BaseMaker):
         return pandas.DataFrame(full_tensor.numpy(), columns=new_columns, index=cat_df.index)
+# Streamlined DatasetMaker version
+class SimpleDatasetMaker:
+    """
+    A simplified dataset maker for pre-processed, numerical pandas DataFrames.
+    This class takes a DataFrame, automatically splits it into training and
+    testing sets, and converts them into PyTorch Datasets. It assumes the
+    target variable is the last column.
+    Args:
+        pandas_df (pandas.DataFrame): The pre-processed input DataFrame with numerical data.
+        test_size (float): The proportion of the dataset to allocate to the
+                           test split.
+        random_state (int): The seed for the random number generator for
+                            reproducibility.
+        id (str | None): An optional object identifier.
+    """
+    def __init__(self, pandas_df: pandas.DataFrame, test_size: float = 0.2, random_state: int = 42, id: Optional[str]=None):
+        """
+        Attributes:
+            `train_dataset` -> PyTorch Dataset
+            `test_dataset`  -> PyTorch Dataset
+            `feature_names` -> list[str]
+            `target_name`   -> str
+            `id` -> str | None
+        """
+        if not isinstance(pandas_df, pandas.DataFrame):
+            raise TypeError("Input must be a pandas.DataFrame.")
+        #set id
+        self._id = id
+        # 1. Identify features and target
+        features = pandas_df.iloc[:, :-1]
+        target = pandas_df.iloc[:, -1]
+        self._feature_names = features.columns.tolist()
+        self._target_name = target.name
+        # 2. Split the data
+        X_train, X_test, y_train, y_test = train_test_split(
+            features, target, test_size=test_size, random_state=random_state
+        )
+        self._X_train_shape = X_train.shape
+        self._X_test_shape = X_test.shape
+        self._y_train_shape = y_train.shape
+        self._y_test_shape = y_test.shape
+        # 3. Convert to PyTorch Datasets
+        self._train_ds = _PytorchDataset(X_train.values, y_train.values)
+        self._test_ds = _PytorchDataset(X_test.values, y_test.values)
+    @property
+    def train_dataset(self) -> Dataset:
+        """Returns the training PyTorch dataset."""
+        return self._train_ds
+    @property
+    def test_dataset(self) -> Dataset:
+        """Returns the testing PyTorch dataset."""
+        return self._test_ds
+    @property
+    def feature_names(self) -> list[str]:
+        """Returns the list of feature column names."""
+        return self._feature_names
+    @property
+    def target_name(self) -> str:
+        """Returns the name of the target column."""
+        return str(self._target_name)
+    @property
+    def id(self) -> Optional[str]:
+        """Returns teh object identifier if any."""
+        return self._id
+    def dataframes_info(self) -> None:
+        """Prints the shape information of the split pandas DataFrames."""
+        print("--- Original DataFrame Shapes After Split ---")
+        print(f"  X_train shape: {self._X_train_shape}")
+        print(f"  y_train shape: {self._y_train_shape}\n")
+        print(f"  X_test shape:  {self._X_test_shape}")
+        print(f"  y_test shape:  {self._y_test_shape}")
+        print("-------------------------------------------")
 # --- VisionDatasetMaker ---
 class VisionDatasetMaker(_BaseMaker):
     """

{dragon_ml_toolbox-5.0.0 → dragon_ml_toolbox-5.1.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "5.0.0"
+version = "5.1.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }