dragon-ml-toolbox 10.4.2__tar.gz → 10.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-10.4.2/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.6.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_models.py +63 -81
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/data_exploration.py +37 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/keys.py +7 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/LICENSE +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/README.md +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ETL_cleaning.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/setup.cfg +0 -0
|
@@ -6,6 +6,8 @@ import json
|
|
|
6
6
|
from ._logger import _LOGGER
|
|
7
7
|
from .path_manager import make_fullpath
|
|
8
8
|
from ._script_info import _script_info
|
|
9
|
+
from .keys import PytorchModelKeys
|
|
10
|
+
|
|
9
11
|
|
|
10
12
|
__all__ = [
|
|
11
13
|
"MultilayerPerceptron",
|
|
@@ -13,12 +15,63 @@ __all__ = [
|
|
|
13
15
|
"MultiHeadAttentionMLP",
|
|
14
16
|
"TabularTransformer",
|
|
15
17
|
"SequencePredictorLSTM",
|
|
16
|
-
"save_architecture",
|
|
17
|
-
"load_architecture"
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class _ArchitectureHandlerMixin:
|
|
22
|
+
"""
|
|
23
|
+
A mixin class to provide save and load functionality for model architectures.
|
|
24
|
+
"""
|
|
25
|
+
def save(self: nn.Module, directory: Union[str, Path], verbose: bool = True): # type: ignore
|
|
26
|
+
"""Saves the model's architecture to a JSON file."""
|
|
27
|
+
if not hasattr(self, 'get_architecture_config'):
|
|
28
|
+
_LOGGER.error(f"Model '{self.__class__.__name__}' must have a 'get_architecture_config()' method to use this functionality.")
|
|
29
|
+
raise AttributeError()
|
|
30
|
+
|
|
31
|
+
path_dir = make_fullpath(directory, make=True, enforce="directory")
|
|
32
|
+
full_path = path_dir / PytorchModelKeys.SAVENAME
|
|
33
|
+
|
|
34
|
+
config = {
|
|
35
|
+
PytorchModelKeys.MODEL: self.__class__.__name__,
|
|
36
|
+
PytorchModelKeys.CONFIG: self.get_architecture_config() # type: ignore
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
with open(full_path, 'w') as f:
|
|
40
|
+
json.dump(config, f, indent=4)
|
|
41
|
+
|
|
42
|
+
if verbose:
|
|
43
|
+
_LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved to '{path_dir.name}'")
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
|
|
47
|
+
"""Loads a model architecture from a JSON file. If a directory is provided, the function will attempt to load a JSON file inside."""
|
|
48
|
+
user_path = make_fullpath(file_or_dir)
|
|
49
|
+
|
|
50
|
+
if user_path.is_dir():
|
|
51
|
+
target_path = make_fullpath(user_path / PytorchModelKeys.SAVENAME, enforce="file")
|
|
52
|
+
elif user_path.is_file():
|
|
53
|
+
target_path = user_path
|
|
54
|
+
else:
|
|
55
|
+
_LOGGER.error(f"Invalid path: '{file_or_dir}'")
|
|
56
|
+
raise IOError()
|
|
57
|
+
|
|
58
|
+
with open(target_path, 'r') as f:
|
|
59
|
+
saved_data = json.load(f)
|
|
60
|
+
|
|
61
|
+
saved_class_name = saved_data[PytorchModelKeys.MODEL]
|
|
62
|
+
config = saved_data[PytorchModelKeys.CONFIG]
|
|
63
|
+
|
|
64
|
+
if saved_class_name != cls.__name__:
|
|
65
|
+
_LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{cls.__name__}' was expected.")
|
|
66
|
+
raise ValueError()
|
|
67
|
+
|
|
68
|
+
model = cls(**config)
|
|
69
|
+
if verbose:
|
|
70
|
+
_LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
|
|
71
|
+
return model
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class _BaseMLP(nn.Module, _ArchitectureHandlerMixin):
|
|
22
75
|
"""
|
|
23
76
|
A base class for Multilayer Perceptrons.
|
|
24
77
|
|
|
@@ -68,7 +121,7 @@ class _BaseMLP(nn.Module):
|
|
|
68
121
|
# Set a customizable Prediction Head for flexibility, specially in transfer learning and fine-tuning
|
|
69
122
|
self.output_layer = nn.Linear(current_features, out_targets)
|
|
70
123
|
|
|
71
|
-
def
|
|
124
|
+
def get_architecture_config(self) -> Dict[str, Any]:
|
|
72
125
|
"""Returns the base configuration of the model."""
|
|
73
126
|
return {
|
|
74
127
|
'in_features': self.in_features,
|
|
@@ -228,9 +281,9 @@ class MultiHeadAttentionMLP(_BaseMLP):
|
|
|
228
281
|
|
|
229
282
|
return logits, attention_weights
|
|
230
283
|
|
|
231
|
-
def
|
|
284
|
+
def get_architecture_config(self) -> Dict[str, Any]:
|
|
232
285
|
"""Returns the full configuration of the model."""
|
|
233
|
-
config = super().
|
|
286
|
+
config = super().get_architecture_config()
|
|
234
287
|
config['num_heads'] = self.num_heads
|
|
235
288
|
config['attention_dropout'] = self.attention_dropout
|
|
236
289
|
return config
|
|
@@ -247,7 +300,7 @@ class MultiHeadAttentionMLP(_BaseMLP):
|
|
|
247
300
|
return f"MultiHeadAttentionMLP(arch: {arch_str})"
|
|
248
301
|
|
|
249
302
|
|
|
250
|
-
class TabularTransformer(nn.Module):
|
|
303
|
+
class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
|
|
251
304
|
"""
|
|
252
305
|
A Transformer-based model for tabular data tasks.
|
|
253
306
|
|
|
@@ -357,7 +410,7 @@ class TabularTransformer(nn.Module):
|
|
|
357
410
|
|
|
358
411
|
return logits
|
|
359
412
|
|
|
360
|
-
def
|
|
413
|
+
def get_architecture_config(self) -> Dict[str, Any]:
|
|
361
414
|
"""Returns the full configuration of the model."""
|
|
362
415
|
return {
|
|
363
416
|
'out_targets': self.out_targets,
|
|
@@ -529,7 +582,7 @@ class _MultiHeadAttentionLayer(nn.Module):
|
|
|
529
582
|
return out, attn_weights.squeeze()
|
|
530
583
|
|
|
531
584
|
|
|
532
|
-
class SequencePredictorLSTM(nn.Module):
|
|
585
|
+
class SequencePredictorLSTM(nn.Module, _ArchitectureHandlerMixin):
|
|
533
586
|
"""
|
|
534
587
|
A simple LSTM-based network for sequence-to-sequence prediction tasks.
|
|
535
588
|
|
|
@@ -597,7 +650,7 @@ class SequencePredictorLSTM(nn.Module):
|
|
|
597
650
|
|
|
598
651
|
return predictions
|
|
599
652
|
|
|
600
|
-
def
|
|
653
|
+
def get_architecture_config(self) -> dict:
|
|
601
654
|
"""Returns the configuration of the model."""
|
|
602
655
|
return {
|
|
603
656
|
'features': self.features,
|
|
@@ -615,76 +668,5 @@ class SequencePredictorLSTM(nn.Module):
|
|
|
615
668
|
)
|
|
616
669
|
|
|
617
670
|
|
|
618
|
-
def save_architecture(model: nn.Module, directory: Union[str, Path], verbose: bool=True):
|
|
619
|
-
"""
|
|
620
|
-
Saves a model's architecture to a 'architecture.json' file.
|
|
621
|
-
|
|
622
|
-
This function relies on the model having a `get_config()` method that
|
|
623
|
-
returns a dictionary of the arguments needed to initialize it.
|
|
624
|
-
|
|
625
|
-
Args:
|
|
626
|
-
model (nn.Module): The PyTorch model instance to save.
|
|
627
|
-
directory (str | Path): The directory to save the JSON file.
|
|
628
|
-
|
|
629
|
-
Raises:
|
|
630
|
-
AttributeError: If the model does not have a `get_config()` method.
|
|
631
|
-
"""
|
|
632
|
-
if not hasattr(model, 'get_config'):
|
|
633
|
-
_LOGGER.error(f"Model '{model.__class__.__name__}' does not have a 'get_config()' method.")
|
|
634
|
-
raise AttributeError()
|
|
635
|
-
|
|
636
|
-
# Ensure the target directory exists
|
|
637
|
-
path_dir = make_fullpath(directory, make=True, enforce="directory")
|
|
638
|
-
full_path = path_dir / "architecture.json"
|
|
639
|
-
|
|
640
|
-
config = {
|
|
641
|
-
'model_class': model.__class__.__name__,
|
|
642
|
-
'config': model.get_config() # type: ignore
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
with open(full_path, 'w') as f:
|
|
646
|
-
json.dump(config, f, indent=4)
|
|
647
|
-
|
|
648
|
-
if verbose:
|
|
649
|
-
_LOGGER.info(f"Architecture for '{model.__class__.__name__}' saved to '{path_dir.name}'")
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
def load_architecture(filepath: Union[str, Path], expected_model_class: type, verbose: bool=True) -> nn.Module:
|
|
653
|
-
"""
|
|
654
|
-
Loads a model architecture from a JSON file.
|
|
655
|
-
|
|
656
|
-
This function instantiates a model by providing an explicit class to use
|
|
657
|
-
and checking that it matches the class name specified in the file.
|
|
658
|
-
|
|
659
|
-
Args:
|
|
660
|
-
filepath (Union[str, Path]): The path of the JSON architecture file.
|
|
661
|
-
expected_model_class (type): The model class expected to load (e.g., MultilayerPerceptron).
|
|
662
|
-
|
|
663
|
-
Returns:
|
|
664
|
-
nn.Module: An instance of the model with a freshly initialized state.
|
|
665
|
-
|
|
666
|
-
Raises:
|
|
667
|
-
FileNotFoundError: If the filepath does not exist.
|
|
668
|
-
ValueError: If the class name in the file does not match the `expected_model_class`.
|
|
669
|
-
"""
|
|
670
|
-
path_obj = make_fullpath(filepath, enforce="file")
|
|
671
|
-
|
|
672
|
-
with open(path_obj, 'r') as f:
|
|
673
|
-
saved_data = json.load(f)
|
|
674
|
-
|
|
675
|
-
saved_class_name = saved_data['model_class']
|
|
676
|
-
config = saved_data['config']
|
|
677
|
-
|
|
678
|
-
if saved_class_name != expected_model_class.__name__:
|
|
679
|
-
_LOGGER.error(f"Model class mismatch. File specifies '{saved_class_name}', but '{expected_model_class.__name__}' was expected.")
|
|
680
|
-
raise ValueError()
|
|
681
|
-
|
|
682
|
-
# Create an instance of the model using the provided class and config
|
|
683
|
-
model = expected_model_class(**config)
|
|
684
|
-
if verbose:
|
|
685
|
-
_LOGGER.info(f"Successfully loaded architecture for '{saved_class_name}'")
|
|
686
|
-
return model
|
|
687
|
-
|
|
688
|
-
|
|
689
671
|
def info():
|
|
690
672
|
_script_info(__all__)
|
|
@@ -21,6 +21,7 @@ __all__ = [
|
|
|
21
21
|
"show_null_columns",
|
|
22
22
|
"drop_columns_with_missing_data",
|
|
23
23
|
"drop_macro",
|
|
24
|
+
"clean_column_names",
|
|
24
25
|
"split_features_targets",
|
|
25
26
|
"split_continuous_binary",
|
|
26
27
|
"plot_correlation_heatmap",
|
|
@@ -300,6 +301,42 @@ def drop_macro(df: pd.DataFrame,
|
|
|
300
301
|
return df_clean
|
|
301
302
|
|
|
302
303
|
|
|
304
|
+
def clean_column_names(df: pd.DataFrame, replacement_char: str = '-', replacement_pattern: str = r'[\[\]{}<>,:"]', verbose: bool = True) -> pd.DataFrame:
|
|
305
|
+
"""
|
|
306
|
+
Cleans DataFrame column names by replacing special characters.
|
|
307
|
+
|
|
308
|
+
This function is useful for ensuring compatibility with libraries like LightGBM,
|
|
309
|
+
which do not support special JSON characters such as `[]{}<>,:"` in feature names.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
df (pd.DataFrame): The input DataFrame.
|
|
313
|
+
replacement_char (str): The character to use for replacing characters.
|
|
314
|
+
replacement_pattern (str): Regex pattern to use for the replacement logic.
|
|
315
|
+
verbose (bool): If True, prints the renamed columns.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
pd.DataFrame: A new DataFrame with cleaned column names.
|
|
319
|
+
"""
|
|
320
|
+
new_df = df.copy()
|
|
321
|
+
|
|
322
|
+
original_columns = new_df.columns
|
|
323
|
+
new_columns = original_columns.str.replace(replacement_pattern, replacement_char, regex=True)
|
|
324
|
+
|
|
325
|
+
# Create a map of changes for logging
|
|
326
|
+
rename_map = {old: new for old, new in zip(original_columns, new_columns) if old != new}
|
|
327
|
+
|
|
328
|
+
if verbose:
|
|
329
|
+
if rename_map:
|
|
330
|
+
_LOGGER.info(f"Cleaned {len(rename_map)} column name(s) containing special characters:")
|
|
331
|
+
for old, new in rename_map.items():
|
|
332
|
+
print(f" '{old}' -> '{new}'")
|
|
333
|
+
else:
|
|
334
|
+
_LOGGER.info("No column names required cleaning.")
|
|
335
|
+
|
|
336
|
+
new_df.columns = new_columns
|
|
337
|
+
return new_df
|
|
338
|
+
|
|
339
|
+
|
|
303
340
|
def split_features_targets(df: pd.DataFrame, targets: list[str]):
|
|
304
341
|
"""
|
|
305
342
|
Splits a DataFrame's columns into features and targets.
|
|
@@ -38,6 +38,13 @@ class PyTorchInferenceKeys:
|
|
|
38
38
|
PROBABILITIES = "probabilities"
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
class PytorchModelKeys:
|
|
42
|
+
"""Keys for saving and loading models"""
|
|
43
|
+
MODEL = 'model_class',
|
|
44
|
+
CONFIG = "config",
|
|
45
|
+
SAVENAME = "architecture.json"
|
|
46
|
+
|
|
47
|
+
|
|
41
48
|
class _OneHotOtherPlaceholder:
|
|
42
49
|
"""Used internally by GUI_tools."""
|
|
43
50
|
OTHER_GUI = "OTHER"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/requires.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-10.4.2 → dragon_ml_toolbox-10.6.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|