PyPI - dragon-ml-toolbox - Versions diffs - 13.3.2__py3-none-any.whl → 13.4.0__py3-none-any.whl - Mend

dragon-ml-toolbox 13.3.2py3-none-any.whl → 13.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (7) hide show

{dragon_ml_toolbox-13.3.2.dist-info → dragon_ml_toolbox-13.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 13.3.2
+Version: 13.4.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-13.3.2.dist-info → dragon_ml_toolbox-13.4.0.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-dragon_ml_toolbox-13.3.2.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-13.3.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
+dragon_ml_toolbox-13.4.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-13.4.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
 ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
 ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
 ml_tools/MICE_imputation.py,sha256=X273Qlgoqqg7KTmoKd75YDyAPB0UIbTzGP3xsCmRh3E,11717
 ml_tools/ML_callbacks.py,sha256=elD2Yr030sv_6gX_m9GVd6HTyrbmt34nFS8lrgS4HtM,15808
-ml_tools/ML_datasetmaster.py,sha256=7QJnOM6GWFklKt2fiukITM3DK49i3ThK8wazb5szwpE,34396
+ml_tools/ML_datasetmaster.py,sha256=6caWbq6eu1RE9V51gmceD71PtMctJRjFuLvkkK5ChiY,36271
 ml_tools/ML_evaluation.py,sha256=3u5dOhS77gn3kAshKr2GwSa5xZBF0YM77ZkFevqNPvA,18528
 ml_tools/ML_evaluation_multi.py,sha256=L6Ub_uObXsI7ToVCF6DtmAFekHRcga5wWMOnRYRR-BY,16121
 ml_tools/ML_inference.py,sha256=yq2gdN6s_OUYC5ZLQrIJC5BA5H33q8UKODXwb-_0M2c,23549
@@ -35,7 +35,7 @@ ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJR
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
 ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
-dragon_ml_toolbox-13.3.2.dist-info/METADATA,sha256=RMnB45xVa4W8DibE8KTKn-Au62avG72w_ujDIsWnZBM,6166
-dragon_ml_toolbox-13.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-13.3.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-13.3.2.dist-info/RECORD,,
+dragon_ml_toolbox-13.4.0.dist-info/METADATA,sha256=Ixk5If3BJhjyJy9_mirNJ2QckMELXFQiJa9_8RWfreI,6166
+dragon_ml_toolbox-13.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-13.4.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-13.4.0.dist-info/RECORD,,

ml_tools/ML_datasetmaster.py CHANGED Viewed

@@ -126,8 +126,8 @@ class _BaseDatasetMaker(ABC):
         else:
             _LOGGER.info("No continuous features listed in schema. Scaler will not be fitted.")
-        X_train_values = X_train.values
-        X_test_values = X_test.values
+        X_train_values = X_train.to_numpy()
+        X_test_values = X_test.to_numpy()
         # continuous_feature_indices is derived
         if self.scaler is None and continuous_feature_indices:
@@ -253,26 +253,42 @@ class DatasetMaker(_BaseDatasetMaker):
                  pandas_df: pandas.DataFrame,
                  schema: FeatureSchema,
                  kind: Literal["regression", "classification"],
+                 scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
                  test_size: float = 0.2,
-                 random_state: int = 42,
-                 scaler: Optional[PytorchScaler] = None):
+                 random_state: int = 42):
         """
         Args:
             pandas_df (pandas.DataFrame):
                 The pre-processed input DataFrame containing all columns. (features and single target).
             schema (FeatureSchema):
                 The definitive schema object from data_exploration.
-            kind (Literal["regression", "classification"]):
+            kind ("regression" | "classification"):
                 The type of ML task. This determines the data type of the labels.
+            scaler ("fit" | "none" | PytorchScaler):
+                Strategy for data scaling:
+                - "fit": Fit a new PytorchScaler on continuous features.
+                - "none": Do not scale data (e.g., for TabularTransformer).
+                - PytorchScaler instance: Use a pre-fitted scaler to transform data.
             test_size (float):
                 The proportion of the dataset to allocate to the test split.
             random_state (int):
                 The seed for the random number of generator for reproducibility.
-            scaler (PytorchScaler | None):
-                A pre-fitted PytorchScaler instance, if None a new scaler will be created.
         """
         super().__init__()
-        self.scaler = scaler
+        _apply_scaling: bool = False
+        if scaler == "fit":
+            self.scaler = None # To be created
+            _apply_scaling = True
+        elif scaler == "none":
+            self.scaler = None
+        elif isinstance(scaler, PytorchScaler):
+            self.scaler = scaler # Use the provided one
+            _apply_scaling = True
+        else:
+            _LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
+            raise ValueError()
         # --- 1. Identify features (from schema) ---
         self._feature_names = list(schema.feature_names)
@@ -310,9 +326,14 @@ class DatasetMaker(_BaseDatasetMaker):
         label_dtype = torch.float32 if kind == "regression" else torch.int64
         # --- 4. Scale (using the schema) ---
-        X_train_final, X_test_final = self._prepare_scaler(
-            X_train, y_train, X_test, label_dtype, schema
-        )
+        if _apply_scaling:
+            X_train_final, X_test_final = self._prepare_scaler(
+                X_train, y_train, X_test, label_dtype, schema
+            )
+        else:
+            _LOGGER.info("Features have not been scaled as specified.")
+            X_train_final = X_train.to_numpy()
+            X_test_final = X_test.to_numpy()
         # --- 5. Create Datasets ---
         self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
@@ -336,9 +357,9 @@ class DatasetMakerMulti(_BaseDatasetMaker):
                  pandas_df: pandas.DataFrame,
                  target_columns: List[str],
                  schema: FeatureSchema,
+                 scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
                  test_size: float = 0.2,
-                 random_state: int = 42,
-                 scaler: Optional[PytorchScaler] = None):
+                 random_state: int = 42):
         """
         Args:
             pandas_df (pandas.DataFrame):
@@ -348,20 +369,35 @@ class DatasetMakerMulti(_BaseDatasetMaker):
                 List of target column names.
             schema (FeatureSchema):
                 The definitive schema object from data_exploration.
+            scaler ("fit" | "none" | PytorchScaler):
+                Strategy for data scaling:
+                - "fit": Fit a new PytorchScaler on continuous features.
+                - "none": Do not scale data (e.g., for TabularTransformer).
+                - PytorchScaler instance: Use a pre-fitted scaler to transform data.
             test_size (float):
                 The proportion of the dataset to allocate to the test split.
             random_state (int):
                 The seed for the random number generator for reproducibility.
-            scaler (PytorchScaler | None):
-                A pre-fitted PytorchScaler instance.
         ## Note:
         For multi-binary classification, the most common PyTorch loss function is nn.BCEWithLogitsLoss.
         This loss function requires the labels to be torch.float32 which is the same type required for regression (multi-regression) tasks.
         """
         super().__init__()
-        self.scaler = scaler
+        _apply_scaling: bool = False
+        if scaler == "fit":
+            self.scaler = None
+            _apply_scaling = True
+        elif scaler == "none":
+            self.scaler = None
+        elif isinstance(scaler, PytorchScaler):
+            self.scaler = scaler # Use the provided one
+            _apply_scaling = True
+        else:
+            _LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
+            raise ValueError()
         # --- 1. Get features and targets from schema/args ---
         self._feature_names = list(schema.feature_names)
         self._target_names = target_columns
@@ -403,9 +439,14 @@ class DatasetMakerMulti(_BaseDatasetMaker):
         label_dtype = torch.float32
         # --- 4. Scale (using the schema) ---
-        X_train_final, X_test_final = self._prepare_scaler(
-            X_train, y_train, X_test, label_dtype, schema
-        )
+        if _apply_scaling:
+            X_train_final, X_test_final = self._prepare_scaler(
+                X_train, y_train, X_test, label_dtype, schema
+            )
+        else:
+            _LOGGER.info("Features have not been scaled as specified.")
+            X_train_final = X_train.to_numpy()
+            X_test_final = X_test.to_numpy()
         # --- 5. Create Datasets ---
         # _PytorchDataset now correctly handles y_train (a DataFrame)

{dragon_ml_toolbox-13.3.2.dist-info → dragon_ml_toolbox-13.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-13.3.2.dist-info → dragon_ml_toolbox-13.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-13.3.2.dist-info → dragon_ml_toolbox-13.4.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-13.3.2.dist-info → dragon_ml_toolbox-13.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 13.3.2__py3-none-any.whl → 13.4.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 13.3.2py3-none-any.whl → 13.4.0py3-none-any.whl