dragon-ml-toolbox 7.0.0__tar.gz → 8.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show
  1. {dragon_ml_toolbox-7.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-8.0.0}/PKG-INFO +2 -1
  2. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/README.md +1 -0
  3. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0/dragon_ml_toolbox.egg-info}/PKG-INFO +2 -1
  4. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +2 -0
  5. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_datasetmaster.py +165 -116
  6. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_evaluation.py +5 -2
  7. dragon_ml_toolbox-8.0.0/ml_tools/ML_evaluation_multi.py +296 -0
  8. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_inference.py +232 -34
  9. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_models.py +0 -4
  10. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_trainer.py +168 -71
  11. dragon_ml_toolbox-8.0.0/ml_tools/_ML_optimization_multi.py +231 -0
  12. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/pyproject.toml +1 -1
  13. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/LICENSE +0 -0
  14. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/LICENSE-THIRD-PARTY.md +0 -0
  15. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  16. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  17. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  18. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ETL_engineering.py +0 -0
  19. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/GUI_tools.py +0 -0
  20. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/MICE_imputation.py +0 -0
  21. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_callbacks.py +0 -0
  22. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_optimization.py +0 -0
  23. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ML_scaler.py +0 -0
  24. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/PSO_optimization.py +0 -0
  25. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/RNN_forecast.py +0 -0
  26. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/SQL.py +0 -0
  27. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/VIF_factor.py +0 -0
  28. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/__init__.py +0 -0
  29. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/_logger.py +0 -0
  30. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/_script_info.py +0 -0
  31. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/custom_logger.py +0 -0
  32. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/data_exploration.py +0 -0
  33. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ensemble_evaluation.py +0 -0
  34. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ensemble_inference.py +0 -0
  35. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/ensemble_learning.py +0 -0
  36. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/handle_excel.py +0 -0
  37. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/keys.py +0 -0
  38. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/optimization_tools.py +0 -0
  39. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/path_manager.py +0 -0
  40. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/ml_tools/utilities.py +0 -0
  41. {dragon_ml_toolbox-7.0.0 → dragon_ml_toolbox-8.0.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 7.0.0
3
+ Version: 8.0.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -147,6 +147,7 @@ ensemble_learning
147
147
  ETL_engineering
148
148
  ML_callbacks
149
149
  ML_datasetmaster
150
+ ML_evaluation_multi
150
151
  ML_evaluation
151
152
  ML_inference
152
153
  ML_models
@@ -66,6 +66,7 @@ ensemble_learning
66
66
  ETL_engineering
67
67
  ML_callbacks
68
68
  ML_datasetmaster
69
+ ML_evaluation_multi
69
70
  ML_evaluation
70
71
  ML_inference
71
72
  ML_models
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 7.0.0
3
+ Version: 8.0.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -147,6 +147,7 @@ ensemble_learning
147
147
  ETL_engineering
148
148
  ML_callbacks
149
149
  ML_datasetmaster
150
+ ML_evaluation_multi
150
151
  ML_evaluation
151
152
  ML_inference
152
153
  ML_models
@@ -13,6 +13,7 @@ ml_tools/MICE_imputation.py
13
13
  ml_tools/ML_callbacks.py
14
14
  ml_tools/ML_datasetmaster.py
15
15
  ml_tools/ML_evaluation.py
16
+ ml_tools/ML_evaluation_multi.py
16
17
  ml_tools/ML_inference.py
17
18
  ml_tools/ML_models.py
18
19
  ml_tools/ML_optimization.py
@@ -22,6 +23,7 @@ ml_tools/PSO_optimization.py
22
23
  ml_tools/RNN_forecast.py
23
24
  ml_tools/SQL.py
24
25
  ml_tools/VIF_factor.py
26
+ ml_tools/_ML_optimization_multi.py
25
27
  ml_tools/__init__.py
26
28
  ml_tools/_logger.py
27
29
  ml_tools/_script_info.py
@@ -18,6 +18,7 @@ from .ML_scaler import PytorchScaler
18
18
 
19
19
  __all__ = [
20
20
  "DatasetMaker",
21
+ "DatasetMakerMulti",
21
22
  "VisionDatasetMaker",
22
23
  "SequenceMaker",
23
24
  "ResizeAspectFill",
@@ -57,71 +58,26 @@ class _PytorchDataset(Dataset):
57
58
  return self.features[index], self.labels[index]
58
59
 
59
60
 
60
- # Streamlined DatasetMaker version
61
- class DatasetMaker:
61
+ # --- Abstract Base Class (New) ---
62
+ # --- Abstract Base Class (Corrected) ---
63
+ class _BaseDatasetMaker(ABC):
62
64
  """
63
- A simplified dataset maker for pre-processed, numerical pandas DataFrames.
64
-
65
- This class takes a DataFrame, automatically splits it into training and
66
- testing sets, and converts them into PyTorch Datasets. It assumes the
67
- target variable is the last column. It can also create, apply, and
68
- save a PytorchScaler for standardizing continuous features.
69
-
70
- Attributes:
71
- `scaler` -> PytorchScaler | None
72
- `train_dataset` -> PyTorch Dataset
73
- `test_dataset` -> PyTorch Dataset
74
- `feature_names` -> list[str]
75
- `target_name` -> str
76
- `id` -> str | None
77
-
78
- The ID can be manually set to any string if needed, it is `None` by default.
65
+ Abstract base class for dataset makers. Contains shared logic for
66
+ splitting, scaling, and accessing datasets to reduce code duplication.
79
67
  """
80
- def __init__(self,
81
- pandas_df: pandas.DataFrame,
82
- kind: Literal["regression", "classification"],
83
- test_size: float = 0.2,
84
- random_state: int = 42,
85
- scaler: Optional[PytorchScaler] = None,
86
- continuous_feature_columns: Optional[Union[List[int], List[str]]] = None):
87
- """
88
- Args:
89
- pandas_df (pandas.DataFrame): The pre-processed input DataFrame with numerical data.
90
- kind (Literal["regression", "classification"]): The type of ML task. This determines the data type of the labels.
91
- test_size (float): The proportion of the dataset to allocate to the test split.
92
- random_state (int): The seed for the random number generator for reproducibility.
93
- scaler (PytorchScaler | None): A pre-fitted PytorchScaler instance.
94
- continuous_feature_columns (List[int] | List[str] | None): Column indices or names of continuous features to scale. If provided creates a new PytorchScaler.
95
- """
96
- # Validation
97
- if not isinstance(pandas_df, pandas.DataFrame):
98
- raise TypeError("Input must be a pandas.DataFrame.")
99
- if kind not in ["regression", "classification"]:
100
- raise ValueError("`kind` must be 'regression' or 'classification'.")
101
-
102
- # 1. Identify features and target
103
- features = pandas_df.iloc[:, :-1]
104
- target = pandas_df.iloc[:, -1]
105
-
106
- self._feature_names = features.columns.tolist()
107
- self._target_name = str(target.name)
108
-
109
- #set id
68
+ def __init__(self):
69
+ self._train_ds: Optional[Dataset] = None
70
+ self._test_ds: Optional[Dataset] = None
71
+ self.scaler: Optional[PytorchScaler] = None
110
72
  self._id: Optional[str] = None
111
- # set scaler
112
- self.scaler = scaler
113
-
114
- # 2. Split the data
115
- X_train, X_test, y_train, y_test = train_test_split(
116
- features, target, test_size=test_size, random_state=random_state
117
- )
118
-
119
- self._X_train_shape = X_train.shape
120
- self._X_test_shape = X_test.shape
121
- self._y_train_shape = y_train.shape
122
- self._y_test_shape = y_test.shape
123
-
124
- # 3. Handle Column to Index Conversion
73
+ self._feature_names: List[str] = []
74
+ self._X_train_shape = (0,0)
75
+ self._X_test_shape = (0,0)
76
+ self._y_train_shape = (0,)
77
+ self._y_test_shape = (0,)
78
+
79
+ def _prepare_scaler(self, X_train: pandas.DataFrame, y_train: Union[pandas.Series, pandas.DataFrame], X_test: pandas.DataFrame, label_dtype: torch.dtype, continuous_feature_columns: Optional[Union[List[int], List[str]]]):
80
+ """Internal helper to fit and apply a PytorchScaler."""
125
81
  continuous_feature_indices: Optional[List[int]] = None
126
82
  if continuous_feature_columns:
127
83
  if all(isinstance(c, str) for c in continuous_feature_columns):
@@ -129,108 +85,201 @@ class DatasetMaker:
129
85
  try:
130
86
  continuous_feature_indices = [name_to_idx[name] for name in continuous_feature_columns] # type: ignore
131
87
  except KeyError as e:
132
- raise ValueError(f"Feature column '{e.args[0]}' not found in DataFrame.")
88
+ raise ValueError(f"Feature column '{e.args[0]}' not found.")
133
89
  elif all(isinstance(c, int) for c in continuous_feature_columns):
134
90
  continuous_feature_indices = continuous_feature_columns # type: ignore
135
91
  else:
136
92
  raise TypeError("`continuous_feature_columns` must be a list of all strings or all integers.")
137
-
138
- # 4. Handle Scaling
93
+
139
94
  X_train_values = X_train.values
140
95
  X_test_values = X_test.values
141
-
142
- # If no scaler is provided, fit a new one from the training data
143
- if self.scaler is None:
144
- if continuous_feature_indices:
145
- _LOGGER.info("Feature indices provided. Fitting a new PytorchScaler on training data.")
146
- # A temporary dataset is needed for the PytorchScaler.fit method
147
- temp_label_dtype = torch.float32 if kind == "regression" else torch.int64
148
- temp_train_ds = _PytorchDataset(X_train_values, y_train.values, labels_dtype=temp_label_dtype)
149
- self.scaler = PytorchScaler.fit(temp_train_ds, continuous_feature_indices)
150
-
151
- # If a scaler exists (either passed in or just fitted), apply it
96
+
97
+ if self.scaler is None and continuous_feature_indices:
98
+ _LOGGER.info("Fitting a new PytorchScaler on training data.")
99
+ temp_train_ds = _PytorchDataset(X_train_values, y_train, label_dtype) # type: ignore
100
+ self.scaler = PytorchScaler.fit(temp_train_ds, continuous_feature_indices)
101
+
152
102
  if self.scaler and self.scaler.mean_ is not None:
153
103
  _LOGGER.info("Applying scaler transformation to train and test feature sets.")
154
104
  X_train_tensor = self.scaler.transform(torch.tensor(X_train_values, dtype=torch.float32))
155
105
  X_test_tensor = self.scaler.transform(torch.tensor(X_test_values, dtype=torch.float32))
156
- # Convert back to numpy for the _PytorchDataset class
157
- X_train_values = X_train_tensor.numpy()
158
- X_test_values = X_test_tensor.numpy()
106
+ return X_train_tensor.numpy(), X_test_tensor.numpy()
159
107
 
160
- # 5. Convert to final PyTorch Datasets
161
- label_dtype = torch.float32 if kind == "regression" else torch.int64
162
- self._train_ds = _PytorchDataset(X_train_values, y_train.values, labels_dtype=label_dtype)
163
- self._test_ds = _PytorchDataset(X_test_values, y_test.values, labels_dtype=label_dtype)
108
+ return X_train_values, X_test_values
164
109
 
165
110
  @property
166
111
  def train_dataset(self) -> Dataset:
167
- """Returns the training PyTorch dataset."""
112
+ if self._train_ds is None: raise RuntimeError("Dataset not yet created.")
168
113
  return self._train_ds
169
114
 
170
115
  @property
171
116
  def test_dataset(self) -> Dataset:
172
- """Returns the testing PyTorch dataset."""
117
+ if self._test_ds is None: raise RuntimeError("Dataset not yet created.")
173
118
  return self._test_ds
174
119
 
175
120
  @property
176
121
  def feature_names(self) -> list[str]:
177
- """Returns the list of feature column names."""
178
122
  return self._feature_names
179
123
 
180
- @property
181
- def target_name(self) -> str:
182
- """Returns the name of the target column."""
183
- return self._target_name
184
-
185
124
  @property
186
125
  def id(self) -> Optional[str]:
187
- """Returns the object identifier if any."""
188
126
  return self._id
189
-
127
+
190
128
  @id.setter
191
129
  def id(self, dataset_id: str):
192
- """Sets the ID value"""
193
- if not isinstance(dataset_id, str):
194
- raise ValueError(f"Dataset ID '{type(dataset_id)}' is not a string.")
130
+ if not isinstance(dataset_id, str): raise ValueError("ID must be a string.")
195
131
  self._id = dataset_id
196
132
 
197
133
  def dataframes_info(self) -> None:
198
- """Prints the shape information of the split pandas DataFrames."""
199
- print("--- Original DataFrame Shapes After Split ---")
200
- print(f" X_train shape: {self._X_train_shape}")
201
- print(f" y_train shape: {self._y_train_shape}\n")
202
- print(f" X_test shape: {self._X_test_shape}")
203
- print(f" y_test shape: {self._y_test_shape}")
204
- print("-------------------------------------------")
205
-
134
+ print("--- DataFrame Shapes After Split ---")
135
+ print(f" X_train shape: {self._X_train_shape}, y_train shape: {self._y_train_shape}")
136
+ print(f" X_test shape: {self._X_test_shape}, y_test shape: {self._y_test_shape}")
137
+ print("------------------------------------")
138
+
206
139
  def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
207
140
  """Saves a list of feature names as a text file"""
208
141
  save_list_strings(list_strings=self._feature_names,
209
142
  directory=directory,
210
143
  filename="feature_names",
211
- verbose=verbose)
212
-
144
+ verbose=verbose)
145
+
213
146
  def save_scaler(self, save_dir: Union[str, Path]):
214
147
  """
215
148
  Saves the fitted PytorchScaler's state to a .pth file.
216
149
 
217
- The filename is automatically generated based on the target name.
150
+ The filename is automatically generated based on the dataset id.
218
151
 
219
152
  Args:
220
153
  save_dir (str | Path): The directory where the scaler will be saved.
221
154
  """
222
- if not self.scaler:
223
- _LOGGER.error(" No scaler was fitted or provided.")
224
- return
225
-
155
+ if not self.scaler: raise RuntimeError("No scaler was fitted or provided.")
156
+ if not self.id: raise ValueError("Must set the `id` before saving scaler.")
226
157
  save_path = make_fullpath(save_dir, make=True, enforce="directory")
227
-
228
- # Sanitize the target name for use in a filename
229
- sanitized_target = sanitize_filename(self.target_name)
230
- filename = f"scaler_{sanitized_target}.pth"
231
-
158
+ sanitized_id = sanitize_filename(self.id)
159
+ filename = f"scaler_{sanitized_id}.pth"
232
160
  filepath = save_path / filename
233
161
  self.scaler.save(filepath)
162
+ _LOGGER.info(f"Scaler for dataset '{self.id}' saved to '{filepath.name}'.")
163
+
164
+
165
+ # Single target dataset
166
+ class DatasetMaker(_BaseDatasetMaker):
167
+ """
168
+ Dataset maker for pre-processed, numerical pandas DataFrames with a single target column.
169
+
170
+ This class takes a DataFrame, automatically splits it into training and
171
+ testing sets, and converts them into PyTorch Datasets. It assumes the
172
+ target variable is the last column. It can also create, apply, and
173
+ save a PytorchScaler for standardizing continuous features.
174
+
175
+ Attributes:
176
+ `scaler` -> PytorchScaler | None
177
+ `train_dataset` -> PyTorch Dataset
178
+ `test_dataset` -> PyTorch Dataset
179
+ `feature_names` -> list[str]
180
+ `target_name` -> str
181
+ `id` -> str
182
+
183
+ The ID can be manually set to any string if needed, it is the target name by default.
184
+ """
185
+ def __init__(self,
186
+ pandas_df: pandas.DataFrame,
187
+ kind: Literal["regression", "classification"],
188
+ test_size: float = 0.2,
189
+ random_state: int = 42,
190
+ scaler: Optional[PytorchScaler] = None,
191
+ continuous_feature_columns: Optional[Union[List[int], List[str]]] = None):
192
+ """
193
+ Args:
194
+ pandas_df (pandas.DataFrame): The pre-processed input DataFrame with numerical data.
195
+ kind (Literal["regression", "classification"]): The type of ML task. This determines the data type of the labels.
196
+ test_size (float): The proportion of the dataset to allocate to the test split.
197
+ random_state (int): The seed for the random number generator for reproducibility.
198
+ scaler (PytorchScaler | None): A pre-fitted PytorchScaler instance.
199
+ continuous_feature_columns (List[int] | List[str] | None): Column indices or names of continuous features to scale. If provided creates a new PytorchScaler.
200
+ """
201
+ super().__init__()
202
+ self.scaler = scaler
203
+
204
+ # --- 1. Identify features and target (single-target logic) ---
205
+ features = pandas_df.iloc[:, :-1]
206
+ target = pandas_df.iloc[:, -1]
207
+ self._feature_names = features.columns.tolist()
208
+ self._target_name = str(target.name)
209
+ self._id = self._target_name
210
+
211
+ # --- 2. Split ---
212
+ X_train, X_test, y_train, y_test = train_test_split(
213
+ features, target, test_size=test_size, random_state=random_state
214
+ )
215
+ self._X_train_shape, self._X_test_shape = X_train.shape, X_test.shape
216
+ self._y_train_shape, self._y_test_shape = y_train.shape, y_test.shape
217
+
218
+ label_dtype = torch.float32 if kind == "regression" else torch.int64
219
+
220
+ # --- 3. Scale ---
221
+ X_train_final, X_test_final = self._prepare_scaler(
222
+ X_train, y_train, X_test, label_dtype, continuous_feature_columns
223
+ )
224
+
225
+ # --- 4. Create Datasets ---
226
+ self._train_ds = _PytorchDataset(X_train_final, y_train.values, label_dtype)
227
+ self._test_ds = _PytorchDataset(X_test_final, y_test.values, label_dtype)
228
+
229
+ @property
230
+ def target_name(self) -> str:
231
+ return self._target_name
232
+
233
+
234
+ # --- New Multi-Target Class ---
235
+ class DatasetMakerMulti(_BaseDatasetMaker):
236
+ """
237
+ Dataset maker for pre-processed, numerical pandas DataFrames with a multiple target columns.
238
+
239
+ This class takes a DataFrame, automatically splits it into training and testing sets, and converts them into PyTorch Datasets.
240
+ """
241
+ def __init__(self,
242
+ pandas_df: pandas.DataFrame,
243
+ target_columns: List[str],
244
+ test_size: float = 0.2,
245
+ random_state: int = 42,
246
+ scaler: Optional[PytorchScaler] = None,
247
+ continuous_feature_columns: Optional[Union[List[int], List[str]]] = None):
248
+ """
249
+ Args:
250
+ pandas_df (pandas.DataFrame): The pre-processed input DataFrame with numerical data.
251
+ target_columns (list[str]): List of target column names.
252
+ test_size (float): The proportion of the dataset to allocate to the test split.
253
+ random_state (int): The seed for the random number generator for reproducibility.
254
+ scaler (PytorchScaler | None): A pre-fitted PytorchScaler instance.
255
+ continuous_feature_columns (List[int] | List[str] | None): Column indices or names of continuous features to scale. If provided creates a new PytorchScaler.
256
+ """
257
+ super().__init__()
258
+ self.scaler = scaler
259
+
260
+ self._target_names = target_columns
261
+ self._feature_names = [col for col in pandas_df.columns if col not in target_columns]
262
+ features = pandas_df[self._feature_names]
263
+ target = pandas_df[self._target_names]
264
+
265
+ X_train, X_test, y_train, y_test = train_test_split(
266
+ features, target, test_size=test_size, random_state=random_state
267
+ )
268
+ self._X_train_shape, self._X_test_shape = X_train.shape, X_test.shape
269
+ self._y_train_shape, self._y_test_shape = y_train.shape, y_test.shape
270
+
271
+ label_dtype = torch.float32
272
+
273
+ X_train_final, X_test_final = self._prepare_scaler(
274
+ X_train, y_train, X_test, label_dtype, continuous_feature_columns
275
+ )
276
+
277
+ self._train_ds = _PytorchDataset(X_train_final, y_train, label_dtype)
278
+ self._test_ds = _PytorchDataset(X_test_final, y_test, label_dtype)
279
+
280
+ @property
281
+ def target_names(self) -> list[str]:
282
+ return self._target_names
234
283
 
235
284
 
236
285
  # --- Private Base Class ---
@@ -249,8 +249,11 @@ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray, save_dir: Union[s
249
249
  plt.close(fig_hist)
250
250
 
251
251
 
252
- def shap_summary_plot(model, background_data: Union[torch.Tensor,np.ndarray], instances_to_explain: Union[torch.Tensor,np.ndarray],
253
- feature_names: Optional[list[str]], save_dir: Union[str, Path]):
252
+ def shap_summary_plot(model,
253
+ background_data: Union[torch.Tensor,np.ndarray],
254
+ instances_to_explain: Union[torch.Tensor,np.ndarray],
255
+ feature_names: Optional[list[str]],
256
+ save_dir: Union[str, Path]):
254
257
  """
255
258
  Calculates SHAP values and saves summary plots and data.
256
259