dragon-ml-toolbox 3.3.0__tar.gz → 3.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-3.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-3.4.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/VIF_factor.py +2 -2
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/utilities.py +52 -6
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/LICENSE +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/README.md +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/ML_tutorial.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/_particle_swarm_optimization.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/_pytorch_models.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/datasetmaster.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/logger.py +0 -0
- {dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/setup.cfg +0 -0
|
@@ -35,7 +35,7 @@ def compute_vif(
|
|
|
35
35
|
Args:
|
|
36
36
|
df (pd.DataFrame): The input DataFrame.
|
|
37
37
|
use_columns (list[str] | None): Optional list of columns to include. Defaults to all numeric columns.
|
|
38
|
-
ignore_columns (list[str] | None): Optional list of columns to exclude from the VIF computation. Skipped if `
|
|
38
|
+
ignore_columns (list[str] | None): Optional list of columns to exclude from the VIF computation. Skipped if `use_columns` is provided.
|
|
39
39
|
max_features_to_plot (int): Adjust the number of features shown in the plot.
|
|
40
40
|
save_dir (str | Path | None): Directory to save the plot as SVG. If None, the plot is not saved.
|
|
41
41
|
filename (str | None): Optional filename for saving the plot. Defaults to "VIF_plot.svg".
|
|
@@ -194,7 +194,7 @@ def compute_vif_multi(input_directory: Union[str, Path],
|
|
|
194
194
|
output_plot_directory (str | Path): Save plots to this directory.
|
|
195
195
|
output_dataset_directory (str | Path | None): If provided, saves new CSV files to this directory.
|
|
196
196
|
use_columns (list[str] | None): Optional list of columns to include. Defaults to all numeric columns.
|
|
197
|
-
ignore_columns (list[str] | None): Optional list of columns to exclude from the VIF computation. Skipped if `
|
|
197
|
+
ignore_columns (list[str] | None): Optional list of columns to exclude from the VIF computation. Skipped if `use_columns` is provided.
|
|
198
198
|
max_features_to_plot (int): Adjust the number of features shown in the plot.
|
|
199
199
|
fontsize (int): Base fontsize to scale title and labels on hte plot.
|
|
200
200
|
|
|
@@ -24,7 +24,8 @@ __all__ = [
|
|
|
24
24
|
"threshold_binary_values_batch",
|
|
25
25
|
"serialize_object",
|
|
26
26
|
"deserialize_object",
|
|
27
|
-
"distribute_datasets_by_target"
|
|
27
|
+
"distribute_datasets_by_target",
|
|
28
|
+
"train_dataset_orchestrator"
|
|
28
29
|
]
|
|
29
30
|
|
|
30
31
|
|
|
@@ -497,7 +498,7 @@ def threshold_binary_values_batch(
|
|
|
497
498
|
return np.hstack([cont_part, bin_part])
|
|
498
499
|
|
|
499
500
|
|
|
500
|
-
def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) ->
|
|
501
|
+
def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
|
|
501
502
|
"""
|
|
502
503
|
Serializes a Python object using joblib; suitable for Python built-ins, numpy, and pandas.
|
|
503
504
|
|
|
@@ -505,9 +506,6 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
|
|
|
505
506
|
obj (Any) : The Python object to serialize.
|
|
506
507
|
save_dir (str | Path) : Directory path where the serialized object will be saved.
|
|
507
508
|
filename (str) : Name for the output file, extension will be appended if needed.
|
|
508
|
-
|
|
509
|
-
Returns:
|
|
510
|
-
(Path | None) : The full file path where the object was saved if successful; otherwise, None.
|
|
511
509
|
"""
|
|
512
510
|
try:
|
|
513
511
|
save_path = make_fullpath(save_dir, make=True)
|
|
@@ -526,7 +524,7 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
|
|
|
526
524
|
else:
|
|
527
525
|
if verbose:
|
|
528
526
|
print(f"✅ Object of type '{type(obj)}' saved to '{full_path}'")
|
|
529
|
-
return
|
|
527
|
+
return None
|
|
530
528
|
|
|
531
529
|
|
|
532
530
|
def deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_error: bool=True) -> Optional[Any]:
|
|
@@ -597,6 +595,54 @@ def distribute_datasets_by_target(
|
|
|
597
595
|
yield target, subset
|
|
598
596
|
|
|
599
597
|
|
|
598
|
+
def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
|
|
599
|
+
target_columns: list[str],
|
|
600
|
+
save_dir: Union[str,Path],
|
|
601
|
+
safe_mode: bool=False):
|
|
602
|
+
"""
|
|
603
|
+
Orchestrates the creation of single-target datasets from multiple directories each with a variable number of CSV datasets.
|
|
604
|
+
|
|
605
|
+
This function iterates through a list of directories, finds all CSV files,
|
|
606
|
+
and splits each dataframe based on the provided target columns. Each resulting
|
|
607
|
+
single-target dataframe is then saved to a specified directory.
|
|
608
|
+
|
|
609
|
+
Parameters
|
|
610
|
+
----------
|
|
611
|
+
list_of_dirs : list[str | Path]
|
|
612
|
+
A list of directory paths where the source CSV files are located.
|
|
613
|
+
target_columns : list[str]
|
|
614
|
+
A list of column names to be used as targets for splitting the datasets.
|
|
615
|
+
save_dir : str | Path
|
|
616
|
+
The directory where the newly created single-target datasets will be saved.
|
|
617
|
+
safe_mode : bool
|
|
618
|
+
If True, prefixes the saved filename with the source directory name to prevent overwriting files with the same name from different sources.
|
|
619
|
+
"""
|
|
620
|
+
all_dir_paths: list[Path] = list()
|
|
621
|
+
for dir in list_of_dirs:
|
|
622
|
+
dir_path = make_fullpath(dir)
|
|
623
|
+
if not dir_path.is_dir():
|
|
624
|
+
raise IOError(f"'{dir}' is not a directory.")
|
|
625
|
+
all_dir_paths.append(dir_path)
|
|
626
|
+
|
|
627
|
+
# main loop
|
|
628
|
+
total_saved = 0
|
|
629
|
+
for df_dir in all_dir_paths:
|
|
630
|
+
for df_name, df_path in list_csv_paths(df_dir).items():
|
|
631
|
+
try:
|
|
632
|
+
for target_name, df in distribute_datasets_by_target(df_or_path=df_path, target_columns=target_columns, verbose=False):
|
|
633
|
+
if safe_mode:
|
|
634
|
+
filename = df_dir.name + '_' + target_name + '_' + df_name
|
|
635
|
+
else:
|
|
636
|
+
filename = target_name + '_' + df_name
|
|
637
|
+
save_dataframe(df=df, save_dir=save_dir, filename=filename)
|
|
638
|
+
total_saved += 1
|
|
639
|
+
except Exception as e:
|
|
640
|
+
print(f"⚠️ Failed to process file '{df_path}'. Reason: {e}")
|
|
641
|
+
continue
|
|
642
|
+
|
|
643
|
+
print(f"{total_saved} single-target datasets were created.")
|
|
644
|
+
|
|
645
|
+
|
|
600
646
|
class LogKeys:
|
|
601
647
|
"""
|
|
602
648
|
Used for ML scripts only
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-3.3.0 → dragon_ml_toolbox-3.4.0}/ml_tools/_particle_swarm_optimization.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|