dragon-ml-toolbox 12.4.0__py3-none-any.whl → 12.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-12.4.0.dist-info → dragon_ml_toolbox-12.6.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-12.4.0.dist-info → dragon_ml_toolbox-12.6.0.dist-info}/RECORD +16 -16
- ml_tools/ETL_cleaning.py +4 -4
- ml_tools/ETL_engineering.py +2 -2
- ml_tools/MICE_imputation.py +2 -2
- ml_tools/ML_optimization.py +2 -2
- ml_tools/ML_simple_optimization.py +2 -2
- ml_tools/VIF_factor.py +2 -2
- ml_tools/data_exploration.py +3 -3
- ml_tools/ensemble_learning.py +2 -2
- ml_tools/serde.py +37 -1
- ml_tools/utilities.py +8 -8
- {dragon_ml_toolbox-12.4.0.dist-info → dragon_ml_toolbox-12.6.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-12.4.0.dist-info → dragon_ml_toolbox-12.6.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-12.4.0.dist-info → dragon_ml_toolbox-12.6.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.4.0.dist-info → dragon_ml_toolbox-12.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,41 +1,41 @@
|
|
|
1
|
-
dragon_ml_toolbox-12.
|
|
2
|
-
dragon_ml_toolbox-12.
|
|
3
|
-
ml_tools/ETL_cleaning.py,sha256=
|
|
4
|
-
ml_tools/ETL_engineering.py,sha256=
|
|
1
|
+
dragon_ml_toolbox-12.6.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
+
dragon_ml_toolbox-12.6.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
|
|
3
|
+
ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
|
|
4
|
+
ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
|
|
5
5
|
ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
|
|
6
|
-
ml_tools/MICE_imputation.py,sha256=
|
|
6
|
+
ml_tools/MICE_imputation.py,sha256=X273Qlgoqqg7KTmoKd75YDyAPB0UIbTzGP3xsCmRh3E,11717
|
|
7
7
|
ml_tools/ML_callbacks.py,sha256=-XRIZEy3CPJWTHcoReyIw53FZlTs3pWcTVVnncTQQSc,13909
|
|
8
8
|
ml_tools/ML_datasetmaster.py,sha256=t6q6mU9lz2rYKTVPKjA7yZ5ImV7_NykiciHaYnqIEpA,30822
|
|
9
9
|
ml_tools/ML_evaluation.py,sha256=tLswOPgH4G1KExSMn0876YtNkbxPh-W3J4MYOjomMWA,16208
|
|
10
10
|
ml_tools/ML_evaluation_multi.py,sha256=6OZyQ4SM9ALh38mOABmiHgIQDWcovsD_iOo7Bg9YZCE,12516
|
|
11
11
|
ml_tools/ML_inference.py,sha256=ymFvncFsU10PExq87xnEj541DKV5ck0nMuK8ToJHzVQ,23067
|
|
12
12
|
ml_tools/ML_models.py,sha256=pSCV6KbmVnPZr49Kbyg7g25CYaWBWJr6IinBHKgVKGw,28042
|
|
13
|
-
ml_tools/ML_optimization.py,sha256
|
|
13
|
+
ml_tools/ML_optimization.py,sha256=es3TlQbY7RYgJMZnznkjYGbUxFnAqzZxE_g3_qLK9Q8,22960
|
|
14
14
|
ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
|
|
15
|
-
ml_tools/ML_simple_optimization.py,sha256=
|
|
15
|
+
ml_tools/ML_simple_optimization.py,sha256=W2mce1XFCuiOHTOjOsCNbETISHn5MwYlYsTIXH5hMMo,18177
|
|
16
16
|
ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
|
|
17
17
|
ml_tools/ML_utilities.py,sha256=35DfZzAwfDwVwfRECD8X_2ynsU2NCpTdNJSmza6oAzQ,8712
|
|
18
18
|
ml_tools/PSO_optimization.py,sha256=fVHeemqilBS0zrGV25E5yKwDlGdd2ZKa18d8CZ6Q6Fk,22961
|
|
19
19
|
ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
|
|
20
20
|
ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
|
|
21
|
-
ml_tools/VIF_factor.py,sha256=
|
|
21
|
+
ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
|
|
22
22
|
ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
|
|
23
23
|
ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
|
|
24
24
|
ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
|
|
25
25
|
ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
|
|
26
26
|
ml_tools/custom_logger.py,sha256=xot-VeZFigKjcVxADgzvI54vZO_MqMMejo7JmDED8Xo,5892
|
|
27
|
-
ml_tools/data_exploration.py,sha256=
|
|
27
|
+
ml_tools/data_exploration.py,sha256=xvuWHrxk5HOtY7_WmtXEUNf1d_TUxuNPFPFyN7QJxCY,46836
|
|
28
28
|
ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
|
|
29
29
|
ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
|
|
30
|
-
ml_tools/ensemble_learning.py,sha256=
|
|
30
|
+
ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
|
|
31
31
|
ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
|
|
32
32
|
ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
|
|
33
33
|
ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
|
|
34
34
|
ml_tools/optimization_tools.py,sha256=ewYMAdSGlFxYALAGFXn-MsHpvW_Sbx6I-sKg9Kp6rB8,13533
|
|
35
35
|
ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
|
|
36
|
-
ml_tools/serde.py,sha256=
|
|
37
|
-
ml_tools/utilities.py,sha256=
|
|
38
|
-
dragon_ml_toolbox-12.
|
|
39
|
-
dragon_ml_toolbox-12.
|
|
40
|
-
dragon_ml_toolbox-12.
|
|
41
|
-
dragon_ml_toolbox-12.
|
|
36
|
+
ml_tools/serde.py,sha256=UIshIesHRFmxr8F6B3LxGG8bYc1HHK-nlE3kENSZL18,5288
|
|
37
|
+
ml_tools/utilities.py,sha256=_uCYOo5f6Zp6wxabrHwDfjvaipqziD7BWSdKhjl2VGU,13923
|
|
38
|
+
dragon_ml_toolbox-12.6.0.dist-info/METADATA,sha256=Bg2mEGcvqeyB4Pr-M3ETjAM3GOwr7FBH0s57XpuBij0,6166
|
|
39
|
+
dragon_ml_toolbox-12.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
dragon_ml_toolbox-12.6.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
41
|
+
dragon_ml_toolbox-12.6.0.dist-info/RECORD,,
|
ml_tools/ETL_cleaning.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Union, List, Dict
|
|
|
5
5
|
|
|
6
6
|
from .path_manager import sanitize_filename, make_fullpath
|
|
7
7
|
from .data_exploration import drop_macro
|
|
8
|
-
from .utilities import
|
|
8
|
+
from .utilities import save_dataframe_filename, load_dataframe
|
|
9
9
|
from ._script_info import _script_info
|
|
10
10
|
from ._logger import _LOGGER
|
|
11
11
|
|
|
@@ -263,7 +263,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
|
|
|
263
263
|
df_final = _cleaner_core(df_in=df, all_lowercase=all_lowercase)
|
|
264
264
|
|
|
265
265
|
# Save cleaned dataframe
|
|
266
|
-
|
|
266
|
+
save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
|
|
267
267
|
|
|
268
268
|
_LOGGER.info(f"Data successfully cleaned.")
|
|
269
269
|
|
|
@@ -329,7 +329,7 @@ def basic_clean_drop(input_filepath: Union[str,Path], output_filepath: Union[str
|
|
|
329
329
|
threshold=threshold)
|
|
330
330
|
|
|
331
331
|
# Save cleaned dataframe
|
|
332
|
-
|
|
332
|
+
save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
|
|
333
333
|
|
|
334
334
|
_LOGGER.info(f"Data successfully cleaned.")
|
|
335
335
|
|
|
@@ -494,7 +494,7 @@ class DataFrameCleaner:
|
|
|
494
494
|
if isinstance(output_filepath, str):
|
|
495
495
|
output_filepath = make_fullpath(input_path=output_filepath, enforce="file")
|
|
496
496
|
|
|
497
|
-
|
|
497
|
+
save_dataframe_filename(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
|
|
498
498
|
|
|
499
499
|
return None
|
|
500
500
|
|
ml_tools/ETL_engineering.py
CHANGED
|
@@ -3,7 +3,7 @@ import re
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
|
|
5
5
|
|
|
6
|
-
from .utilities import load_dataframe,
|
|
6
|
+
from .utilities import load_dataframe, save_dataframe_filename
|
|
7
7
|
from .path_manager import make_fullpath
|
|
8
8
|
from ._script_info import _script_info
|
|
9
9
|
from ._logger import _LOGGER
|
|
@@ -230,7 +230,7 @@ class DataProcessor:
|
|
|
230
230
|
df_processed = self.transform(df)
|
|
231
231
|
|
|
232
232
|
# save processed df
|
|
233
|
-
|
|
233
|
+
save_dataframe_filename(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
|
|
234
234
|
|
|
235
235
|
def __str__(self) -> str:
|
|
236
236
|
"""
|
ml_tools/MICE_imputation.py
CHANGED
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
|
6
6
|
from plotnine import ggplot, labs, theme, element_blank # type: ignore
|
|
7
7
|
from typing import Optional, Union
|
|
8
8
|
|
|
9
|
-
from .utilities import load_dataframe, merge_dataframes,
|
|
9
|
+
from .utilities import load_dataframe, merge_dataframes, save_dataframe_filename
|
|
10
10
|
from .math_utilities import threshold_binary_values
|
|
11
11
|
from .path_manager import sanitize_filename, make_fullpath, list_csv_paths
|
|
12
12
|
from ._logger import _LOGGER
|
|
@@ -75,7 +75,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
|
|
|
75
75
|
def save_imputed_datasets(save_dir: Union[str, Path], imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
|
|
76
76
|
for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
|
|
77
77
|
merged_df = merge_dataframes(imputed_df, df_targets, direction="horizontal", verbose=False)
|
|
78
|
-
|
|
78
|
+
save_dataframe_filename(df=merged_df, save_dir=save_dir, filename=subname)
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
#Get names of features that had missing values before imputation
|
ml_tools/ML_optimization.py
CHANGED
|
@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
|
|
|
18
18
|
from .keys import PyTorchInferenceKeys
|
|
19
19
|
from .SQL import DatabaseManager
|
|
20
20
|
from .optimization_tools import _save_result
|
|
21
|
-
from .utilities import
|
|
21
|
+
from .utilities import save_dataframe_filename
|
|
22
22
|
from .math_utilities import discretize_categorical_values
|
|
23
23
|
|
|
24
24
|
|
|
@@ -513,7 +513,7 @@ def _run_single_optimization_rep(
|
|
|
513
513
|
|
|
514
514
|
def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
|
|
515
515
|
log_dataframe = logger.to_dataframe()
|
|
516
|
-
|
|
516
|
+
save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
|
|
517
517
|
|
|
518
518
|
|
|
519
519
|
def info():
|
|
@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
|
|
|
18
18
|
from .keys import PyTorchInferenceKeys
|
|
19
19
|
from .SQL import DatabaseManager
|
|
20
20
|
from .optimization_tools import _save_result
|
|
21
|
-
from .utilities import
|
|
21
|
+
from .utilities import save_dataframe_filename
|
|
22
22
|
from .math_utilities import threshold_binary_values
|
|
23
23
|
|
|
24
24
|
"""
|
|
@@ -406,7 +406,7 @@ def s_run_optimization(
|
|
|
406
406
|
|
|
407
407
|
def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
|
|
408
408
|
log_dataframe = logger.to_dataframe()
|
|
409
|
-
|
|
409
|
+
save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
|
|
410
410
|
|
|
411
411
|
|
|
412
412
|
def info():
|
ml_tools/VIF_factor.py
CHANGED
|
@@ -7,7 +7,7 @@ from statsmodels.tools.tools import add_constant
|
|
|
7
7
|
import warnings
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
|
-
from .utilities import yield_dataframes_from_dir,
|
|
10
|
+
from .utilities import yield_dataframes_from_dir, save_dataframe_filename
|
|
11
11
|
from .path_manager import sanitize_filename, make_fullpath
|
|
12
12
|
from ._logger import _LOGGER
|
|
13
13
|
from ._script_info import _script_info
|
|
@@ -229,7 +229,7 @@ def compute_vif_multi(input_directory: Union[str, Path],
|
|
|
229
229
|
result_df, dropped_cols = drop_vif_based(df=df, vif_df=vif_dataframe)
|
|
230
230
|
|
|
231
231
|
if len(dropped_cols) > 0:
|
|
232
|
-
|
|
232
|
+
save_dataframe_filename(df=result_df, save_dir=output_dataset_path, filename=new_filename)
|
|
233
233
|
|
|
234
234
|
|
|
235
235
|
def info():
|
ml_tools/data_exploration.py
CHANGED
|
@@ -10,7 +10,7 @@ import re
|
|
|
10
10
|
from .path_manager import sanitize_filename, make_fullpath
|
|
11
11
|
from ._script_info import _script_info
|
|
12
12
|
from ._logger import _LOGGER
|
|
13
|
-
from .utilities import
|
|
13
|
+
from .utilities import save_dataframe_filename
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
# Keep track of all available tools, show using `info()`
|
|
@@ -269,7 +269,7 @@ def drop_macro(df: pd.DataFrame,
|
|
|
269
269
|
|
|
270
270
|
# Log initial state
|
|
271
271
|
missing_data = show_null_columns(df=df_clean)
|
|
272
|
-
|
|
272
|
+
save_dataframe_filename(df=missing_data.reset_index(drop=False),
|
|
273
273
|
save_dir=log_directory,
|
|
274
274
|
filename="Missing_Data_start")
|
|
275
275
|
|
|
@@ -298,7 +298,7 @@ def drop_macro(df: pd.DataFrame,
|
|
|
298
298
|
|
|
299
299
|
# log final state
|
|
300
300
|
missing_data = show_null_columns(df=df_clean)
|
|
301
|
-
|
|
301
|
+
save_dataframe_filename(df=missing_data.reset_index(drop=False),
|
|
302
302
|
save_dir=log_directory,
|
|
303
303
|
filename="Missing_Data_final")
|
|
304
304
|
|
ml_tools/ensemble_learning.py
CHANGED
|
@@ -14,7 +14,7 @@ from sklearn.model_selection import train_test_split
|
|
|
14
14
|
from sklearn.base import clone
|
|
15
15
|
|
|
16
16
|
from .utilities import yield_dataframes_from_dir, train_dataset_yielder
|
|
17
|
-
from .serde import
|
|
17
|
+
from .serde import serialize_object_filename
|
|
18
18
|
from .path_manager import sanitize_filename, make_fullpath
|
|
19
19
|
from ._script_info import _script_info
|
|
20
20
|
from .keys import EnsembleKeys
|
|
@@ -411,7 +411,7 @@ def _save_model(trained_model, model_name: str, target_name:str, feature_names:
|
|
|
411
411
|
EnsembleKeys.FEATURES: feature_names,
|
|
412
412
|
EnsembleKeys.TARGET: target_name}
|
|
413
413
|
|
|
414
|
-
|
|
414
|
+
serialize_object_filename(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
|
|
415
415
|
|
|
416
416
|
|
|
417
417
|
# TRAIN EVALUATE PIPELINE
|
ml_tools/serde.py
CHANGED
|
@@ -9,12 +9,13 @@ from ._logger import _LOGGER
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
|
+
"serialize_object_filename",
|
|
12
13
|
"serialize_object",
|
|
13
14
|
"deserialize_object",
|
|
14
15
|
]
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
def
|
|
18
|
+
def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
|
|
18
19
|
"""
|
|
19
20
|
Serializes a Python object using joblib; suitable for Python built-ins, numpy, and pandas.
|
|
20
21
|
|
|
@@ -40,6 +41,41 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
|
|
|
40
41
|
_LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
|
|
41
42
|
return None
|
|
42
43
|
|
|
44
|
+
|
|
45
|
+
def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Serializes a Python object using joblib to a specific file path.
|
|
48
|
+
|
|
49
|
+
Suitable for Python built-ins, numpy, and pandas.
|
|
50
|
+
|
|
51
|
+
Parameters:
|
|
52
|
+
obj (Any) : The Python object to serialize.
|
|
53
|
+
file_path (Path) : The full file path to save the object to.
|
|
54
|
+
'.joblib' extension will be appended if missing.
|
|
55
|
+
raise_on_error (bool) : If True, raises exceptions on failure.
|
|
56
|
+
"""
|
|
57
|
+
try:
|
|
58
|
+
# Ensure the extension is correct
|
|
59
|
+
if file_path.suffix != '.joblib':
|
|
60
|
+
file_path = file_path.with_suffix(file_path.suffix + '.joblib')
|
|
61
|
+
|
|
62
|
+
# Ensure the parent directory exists
|
|
63
|
+
_save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
|
|
64
|
+
|
|
65
|
+
# Dump the object
|
|
66
|
+
joblib.dump(obj, file_path)
|
|
67
|
+
|
|
68
|
+
except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
|
|
69
|
+
_LOGGER.error(f"Failed to serialize object of type '{type(obj)}' to '{file_path}'. Error: {e}")
|
|
70
|
+
if raise_on_error:
|
|
71
|
+
raise e
|
|
72
|
+
return None
|
|
73
|
+
else:
|
|
74
|
+
if verbose:
|
|
75
|
+
_LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
43
79
|
# Define a TypeVar to link the expected type to the return type of deserialization
|
|
44
80
|
T = TypeVar('T')
|
|
45
81
|
|
ml_tools/utilities.py
CHANGED
|
@@ -14,8 +14,8 @@ __all__ = [
|
|
|
14
14
|
"load_dataframe",
|
|
15
15
|
"yield_dataframes_from_dir",
|
|
16
16
|
"merge_dataframes",
|
|
17
|
+
"save_dataframe_filename",
|
|
17
18
|
"save_dataframe",
|
|
18
|
-
"save_dataframe_path",
|
|
19
19
|
"distribute_dataset_by_target",
|
|
20
20
|
"train_dataset_orchestrator",
|
|
21
21
|
"train_dataset_yielder"
|
|
@@ -210,7 +210,7 @@ def merge_dataframes(
|
|
|
210
210
|
return merged_df
|
|
211
211
|
|
|
212
212
|
|
|
213
|
-
def
|
|
213
|
+
def save_dataframe_filename(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
|
|
214
214
|
"""
|
|
215
215
|
Saves a pandas or polars DataFrame to a CSV file.
|
|
216
216
|
|
|
@@ -250,11 +250,11 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Pa
|
|
|
250
250
|
_LOGGER.info(f"Saved dataset: '{filename}' with shape: {df.shape}")
|
|
251
251
|
|
|
252
252
|
|
|
253
|
-
def
|
|
253
|
+
def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
|
|
254
254
|
"""
|
|
255
255
|
Saves a DataFrame to a specified full path.
|
|
256
256
|
|
|
257
|
-
This function is a
|
|
257
|
+
This function is a wrapper for `save_dataframe_filename()`. It takes a
|
|
258
258
|
single `pathlib.Path` object pointing to a `.csv` file.
|
|
259
259
|
|
|
260
260
|
Args:
|
|
@@ -265,9 +265,9 @@ def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
|
|
|
265
265
|
_LOGGER.error('A path object pointing to a .csv file must be provided.')
|
|
266
266
|
raise ValueError()
|
|
267
267
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
268
|
+
save_dataframe_filename(df=df,
|
|
269
|
+
save_dir=full_path.parent,
|
|
270
|
+
filename=full_path.name)
|
|
271
271
|
|
|
272
272
|
|
|
273
273
|
def distribute_dataset_by_target(
|
|
@@ -351,7 +351,7 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
|
|
|
351
351
|
filename = df_dir.name + '_' + target_name + '_' + df_name
|
|
352
352
|
else:
|
|
353
353
|
filename = target_name + '_' + df_name
|
|
354
|
-
|
|
354
|
+
save_dataframe_filename(df=df, save_dir=save_dir, filename=filename)
|
|
355
355
|
total_saved += 1
|
|
356
356
|
except Exception as e:
|
|
357
357
|
_LOGGER.error(f"Failed to process file '{df_path}'. Reason: {e}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|