dragon-ml-toolbox 5.2.0__tar.gz → 5.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-5.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-5.2.2}/PKG-INFO +1 -1
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_callbacks.py +11 -10
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_datasetmaster.py +15 -7
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_models.py +23 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_optimization.py +7 -18
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_trainer.py +2 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/pyproject.toml +1 -1
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/LICENSE +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/README.md +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/setup.cfg +0 -0
|
@@ -6,6 +6,8 @@ from .keys import LogKeys
|
|
|
6
6
|
from ._logger import _LOGGER
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from ._script_info import _script_info
|
|
9
|
+
from typing import Union, Literal
|
|
10
|
+
from pathlib import Path
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
__all__ = [
|
|
@@ -122,7 +124,7 @@ class EarlyStopping(Callback):
|
|
|
122
124
|
inferred from the name of the monitored quantity.
|
|
123
125
|
verbose (int): Verbosity mode.
|
|
124
126
|
"""
|
|
125
|
-
def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=3, mode='auto', verbose=1):
|
|
127
|
+
def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=3, mode: Literal['auto', 'min', 'max']='auto', verbose: int=1):
|
|
126
128
|
super().__init__()
|
|
127
129
|
self.monitor = monitor
|
|
128
130
|
self.patience = patience
|
|
@@ -146,13 +148,13 @@ class EarlyStopping(Callback):
|
|
|
146
148
|
else: # Default to min mode for loss or other metrics
|
|
147
149
|
self.monitor_op = np.less
|
|
148
150
|
|
|
149
|
-
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
|
|
151
|
+
self.best = np.Inf if self.monitor_op == np.less else -np.Inf # type: ignore
|
|
150
152
|
|
|
151
153
|
def on_train_begin(self, logs=None):
|
|
152
154
|
# Reset state at the beginning of training
|
|
153
155
|
self.wait = 0
|
|
154
156
|
self.stopped_epoch = 0
|
|
155
|
-
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
|
|
157
|
+
self.best = np.Inf if self.monitor_op == np.less else -np.Inf # type: ignore
|
|
156
158
|
|
|
157
159
|
def on_epoch_end(self, epoch, logs=None):
|
|
158
160
|
current = logs.get(self.monitor) # type: ignore
|
|
@@ -199,10 +201,10 @@ class ModelCheckpoint(Callback):
|
|
|
199
201
|
mode (str): One of {'auto', 'min', 'max'}.
|
|
200
202
|
verbose (int): Verbosity mode.
|
|
201
203
|
"""
|
|
202
|
-
def __init__(self, save_dir: str, monitor: str = LogKeys.VAL_LOSS,
|
|
203
|
-
save_best_only: bool = False, mode:
|
|
204
|
+
def __init__(self, save_dir: Union[str,Path], monitor: str = LogKeys.VAL_LOSS,
|
|
205
|
+
save_best_only: bool = False, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 1):
|
|
204
206
|
super().__init__()
|
|
205
|
-
self.save_dir = make_fullpath(save_dir, make=True)
|
|
207
|
+
self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
|
|
206
208
|
if not self.save_dir.is_dir():
|
|
207
209
|
_LOGGER.error(f"{save_dir} is not a valid directory.")
|
|
208
210
|
raise IOError()
|
|
@@ -226,17 +228,16 @@ class ModelCheckpoint(Callback):
|
|
|
226
228
|
else:
|
|
227
229
|
self.monitor_op = np.less if 'loss' in self.monitor else np.greater
|
|
228
230
|
|
|
229
|
-
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
|
|
231
|
+
self.best = np.Inf if self.monitor_op == np.less else -np.Inf # type: ignore
|
|
230
232
|
|
|
231
233
|
def on_train_begin(self, logs=None):
|
|
232
234
|
"""Reset state when training starts."""
|
|
233
|
-
self.best = np.Inf if self.monitor_op == np.less else -np.Inf
|
|
235
|
+
self.best = np.Inf if self.monitor_op == np.less else -np.Inf # type: ignore
|
|
234
236
|
self.saved_checkpoints = []
|
|
235
237
|
self.last_best_filepath = None
|
|
236
238
|
|
|
237
239
|
def on_epoch_end(self, epoch, logs=None):
|
|
238
240
|
logs = logs or {}
|
|
239
|
-
self.save_dir.mkdir(parents=True, exist_ok=True)
|
|
240
241
|
|
|
241
242
|
if self.save_best_only:
|
|
242
243
|
self._save_best_model(epoch, logs)
|
|
@@ -250,7 +251,7 @@ class ModelCheckpoint(Callback):
|
|
|
250
251
|
return
|
|
251
252
|
|
|
252
253
|
if self.monitor_op(current, self.best):
|
|
253
|
-
old_best_str = f"{self.best:.4f}" if self.best not in [np.Inf, -np.Inf] else "inf"
|
|
254
|
+
old_best_str = f"{self.best:.4f}" if self.best not in [np.Inf, -np.Inf] else "inf" # type: ignore
|
|
254
255
|
|
|
255
256
|
# Create a descriptive filename
|
|
256
257
|
filename = f"epoch_{epoch}-{self.monitor}_{current:.4f}.pth"
|
|
@@ -386,23 +386,21 @@ class SimpleDatasetMaker:
|
|
|
386
386
|
test split.
|
|
387
387
|
random_state (int): The seed for the random number generator for
|
|
388
388
|
reproducibility.
|
|
389
|
-
id (str | None): An optional object identifier.
|
|
390
389
|
"""
|
|
391
|
-
def __init__(self, pandas_df: pandas.DataFrame, test_size: float = 0.2, random_state: int = 42
|
|
392
|
-
"""
|
|
390
|
+
def __init__(self, pandas_df: pandas.DataFrame, test_size: float = 0.2, random_state: int = 42):
|
|
391
|
+
"""
|
|
393
392
|
Attributes:
|
|
394
393
|
`train_dataset` -> PyTorch Dataset
|
|
395
394
|
`test_dataset` -> PyTorch Dataset
|
|
396
395
|
`feature_names` -> list[str]
|
|
397
396
|
`target_name` -> str
|
|
398
397
|
`id` -> str | None
|
|
398
|
+
|
|
399
|
+
The ID can be manually set to any string if needed, it is `None` by default.
|
|
399
400
|
"""
|
|
400
401
|
|
|
401
402
|
if not isinstance(pandas_df, pandas.DataFrame):
|
|
402
|
-
raise TypeError("Input must be a pandas.DataFrame.")
|
|
403
|
-
|
|
404
|
-
#set id
|
|
405
|
-
self._id = id
|
|
403
|
+
raise TypeError("Input must be a pandas.DataFrame.")
|
|
406
404
|
|
|
407
405
|
# 1. Identify features and target
|
|
408
406
|
features = pandas_df.iloc[:, :-1]
|
|
@@ -410,6 +408,9 @@ class SimpleDatasetMaker:
|
|
|
410
408
|
|
|
411
409
|
self._feature_names = features.columns.tolist()
|
|
412
410
|
self._target_name = target.name
|
|
411
|
+
|
|
412
|
+
#set id
|
|
413
|
+
self._id: Optional[str] = None
|
|
413
414
|
|
|
414
415
|
# 2. Split the data
|
|
415
416
|
X_train, X_test, y_train, y_test = train_test_split(
|
|
@@ -449,6 +450,13 @@ class SimpleDatasetMaker:
|
|
|
449
450
|
def id(self) -> Optional[str]:
|
|
450
451
|
"""Returns the object identifier if any."""
|
|
451
452
|
return self._id
|
|
453
|
+
|
|
454
|
+
@id.setter
|
|
455
|
+
def id(self, dataset_id: str):
|
|
456
|
+
"""Sets the ID value"""
|
|
457
|
+
if not isinstance(dataset_id, str):
|
|
458
|
+
raise ValueError(f"Dataset ID '{type(dataset_id)}' is not a string.")
|
|
459
|
+
self._id = dataset_id
|
|
452
460
|
|
|
453
461
|
def dataframes_info(self) -> None:
|
|
454
462
|
"""Prints the shape information of the split pandas DataFrames."""
|
|
@@ -66,6 +66,21 @@ class MultilayerPerceptron(nn.Module):
|
|
|
66
66
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
67
67
|
"""Defines the forward pass of the model."""
|
|
68
68
|
return self._layers(x)
|
|
69
|
+
|
|
70
|
+
def __repr__(self) -> str:
|
|
71
|
+
"""Returns the developer-friendly string representation of the model."""
|
|
72
|
+
# Extracts the number of neurons from each nn.Linear layer
|
|
73
|
+
layer_sizes = [layer.in_features for layer in self._layers if isinstance(layer, nn.Linear)]
|
|
74
|
+
|
|
75
|
+
# Get the last layer and check its type before accessing the attribute
|
|
76
|
+
last_layer = self._layers[-1]
|
|
77
|
+
if isinstance(last_layer, nn.Linear):
|
|
78
|
+
layer_sizes.append(last_layer.out_features)
|
|
79
|
+
|
|
80
|
+
# Creates a string like: 10 -> 40 -> 80 -> 40 -> 2
|
|
81
|
+
arch_str = ' -> '.join(map(str, layer_sizes))
|
|
82
|
+
|
|
83
|
+
return f"MultilayerPerceptron(arch: {arch_str})"
|
|
69
84
|
|
|
70
85
|
|
|
71
86
|
class SequencePredictorLSTM(nn.Module):
|
|
@@ -128,6 +143,14 @@ class SequencePredictorLSTM(nn.Module):
|
|
|
128
143
|
predictions = self.linear(lstm_out)
|
|
129
144
|
|
|
130
145
|
return predictions
|
|
146
|
+
|
|
147
|
+
def __repr__(self) -> str:
|
|
148
|
+
"""Returns the developer-friendly string representation of the model."""
|
|
149
|
+
return (
|
|
150
|
+
f"SequencePredictorLSTM(features={self.lstm.input_size}, "
|
|
151
|
+
f"hidden_size={self.lstm.hidden_size}, "
|
|
152
|
+
f"recurrent_layers={self.lstm.num_layers})"
|
|
153
|
+
)
|
|
131
154
|
|
|
132
155
|
|
|
133
156
|
def info():
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import torch
|
|
2
|
-
import numpy
|
|
2
|
+
import numpy #handling torch to numpy
|
|
3
3
|
import evotorch
|
|
4
4
|
from evotorch.algorithms import CMAES, SteadyStateGA
|
|
5
5
|
from evotorch.logging import StdOutLogger
|
|
@@ -32,7 +32,7 @@ def create_pytorch_problem(
|
|
|
32
32
|
algorithm: Literal["CMAES", "GA"] = "CMAES",
|
|
33
33
|
verbose: bool = False,
|
|
34
34
|
**searcher_kwargs
|
|
35
|
-
) -> Tuple[evotorch.Problem, evotorch.Searcher]:
|
|
35
|
+
) -> Tuple[evotorch.Problem, evotorch.Searcher]: # type: ignore
|
|
36
36
|
"""
|
|
37
37
|
Creates and configures an EvoTorch Problem and Searcher for a PyTorch model.
|
|
38
38
|
|
|
@@ -62,22 +62,11 @@ def create_pytorch_problem(
|
|
|
62
62
|
device = handler.device
|
|
63
63
|
|
|
64
64
|
# Define the fitness function that EvoTorch will call.
|
|
65
|
-
@evotorch.decorators.to_tensor
|
|
65
|
+
@evotorch.decorators.to_tensor # type: ignore
|
|
66
66
|
@evotorch.decorators.on_aux_device(device)
|
|
67
67
|
def fitness_func(solution_tensor: torch.Tensor) -> torch.Tensor:
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
# Apply thresholding if binary features are present
|
|
72
|
-
if binary_features > 0:
|
|
73
|
-
# Isolate the binary part of the tensor (the last n columns)
|
|
74
|
-
binary_part = processed_tensor[:, -binary_features:]
|
|
75
|
-
|
|
76
|
-
# Apply rounding to snap values to 0.0 or 1.0
|
|
77
|
-
processed_tensor[:, -binary_features:] = torch.round(binary_part)
|
|
78
|
-
|
|
79
|
-
# Use the processed tensor (with thresholded values) for prediction
|
|
80
|
-
predictions = handler.predict_batch(processed_tensor)[PyTorchInferenceKeys.PREDICTIONS]
|
|
68
|
+
# Directly use the continuous-valued tensor from the optimizer for prediction
|
|
69
|
+
predictions = handler.predict_batch(solution_tensor)[PyTorchInferenceKeys.PREDICTIONS]
|
|
81
70
|
return predictions.flatten()
|
|
82
71
|
|
|
83
72
|
# Create the Problem instance.
|
|
@@ -107,7 +96,7 @@ def create_pytorch_problem(
|
|
|
107
96
|
|
|
108
97
|
def run_optimization(
|
|
109
98
|
problem: evotorch.Problem,
|
|
110
|
-
searcher: evotorch.Searcher,
|
|
99
|
+
searcher: evotorch.Searcher, # type: ignore
|
|
111
100
|
num_generations: int,
|
|
112
101
|
target_name: str,
|
|
113
102
|
binary_features: int,
|
|
@@ -168,7 +157,7 @@ def run_optimization(
|
|
|
168
157
|
|
|
169
158
|
# preprocess feature names
|
|
170
159
|
if feature_names is None:
|
|
171
|
-
feature_names = [f"feature_{i}" for i in range(problem.solution_length)]
|
|
160
|
+
feature_names = [f"feature_{i}" for i in range(problem.solution_length)] # type: ignore
|
|
172
161
|
|
|
173
162
|
# --- SINGLE RUN LOGIC ---
|
|
174
163
|
if repetitions <= 1:
|
|
@@ -108,6 +108,8 @@ class MyTrainer:
|
|
|
108
108
|
def fit(self, epochs: int = 10, batch_size: int = 10, shuffle: bool = True):
|
|
109
109
|
"""
|
|
110
110
|
Starts the training-validation process of the model.
|
|
111
|
+
|
|
112
|
+
Returns the "History" callback dictionary.
|
|
111
113
|
|
|
112
114
|
Args:
|
|
113
115
|
epochs (int): The total number of epochs to train for.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-5.2.0 → dragon_ml_toolbox-5.2.2}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|