dragon-ml-toolbox 4.3.0__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-4.3.0.dist-info → dragon_ml_toolbox-4.5.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-4.3.0.dist-info → dragon_ml_toolbox-4.5.0.dist-info}/RECORD +11 -11
- ml_tools/ETL_engineering.py +1 -1
- ml_tools/PSO_optimization.py +14 -1
- ml_tools/__init__.py +1 -0
- ml_tools/custom_logger.py +15 -5
- ml_tools/data_exploration.py +12 -6
- {dragon_ml_toolbox-4.3.0.dist-info → dragon_ml_toolbox-4.5.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-4.3.0.dist-info → dragon_ml_toolbox-4.5.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-4.3.0.dist-info → dragon_ml_toolbox-4.5.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-4.3.0.dist-info → dragon_ml_toolbox-4.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
dragon_ml_toolbox-4.
|
|
2
|
-
dragon_ml_toolbox-4.
|
|
3
|
-
ml_tools/ETL_engineering.py,sha256=
|
|
1
|
+
dragon_ml_toolbox-4.5.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
+
dragon_ml_toolbox-4.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
|
|
3
|
+
ml_tools/ETL_engineering.py,sha256=4wwZXi9_U7xfCY70jGBaKniOeZ0m75ppxWpQBd_DmLc,39369
|
|
4
4
|
ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
|
|
5
5
|
ml_tools/MICE_imputation.py,sha256=b6ZTs8RedXFifOpuMCzr68xM16mCBVh1Ua6kcGfiVtg,11462
|
|
6
6
|
ml_tools/ML_callbacks.py,sha256=0a-Rbr0Xp_B1FNopOKBBmuJ4MqazS5JgDiT7wx1dHvE,13161
|
|
7
7
|
ml_tools/ML_evaluation.py,sha256=4dVqe6JF1Ukmk1sAcY8E5EG1oB1_oy2HXE5OT-pZwCs,10273
|
|
8
8
|
ml_tools/ML_inference.py,sha256=Fh-X2UQn3AznWBjf-7iPSxwE-EzkGQm1VEIRUAkURmE,5336
|
|
9
9
|
ml_tools/ML_trainer.py,sha256=dJjMfCEEM07Txy9KEH-2srZ3CZUa4lFWTJhpNWQ4Ndk,14974
|
|
10
|
-
ml_tools/PSO_optimization.py,sha256=
|
|
10
|
+
ml_tools/PSO_optimization.py,sha256=9bplCNOSe2Ozcz5yQRkbih7geuDO9UJ6dJTMSJJ8zVk,27965
|
|
11
11
|
ml_tools/RNN_forecast.py,sha256=2CyjBLSYYc3xLHxwLXUmP5Qv8AmV1OB_EndETNX1IBk,1956
|
|
12
12
|
ml_tools/SQL.py,sha256=9zzS6AFEJM9aj6nE31hDe8S9TqLonk-J1amwZoiHNbk,10468
|
|
13
13
|
ml_tools/VIF_factor.py,sha256=2nUMupfUoogf8o6ghoFZk_OwWhFXU0R3C9Gj0HOlI14,10415
|
|
14
|
-
ml_tools/__init__.py,sha256=
|
|
14
|
+
ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
|
|
15
15
|
ml_tools/_logger.py,sha256=TpgYguxO-CWYqqgLW0tqFjtwZ58PE_W2OCfWNGZr0n0,1175
|
|
16
16
|
ml_tools/_pytorch_models.py,sha256=ewPPsTHgmRPzMMWwObZOdH1vxm2Ij2VWZP38NC6zSH4,10135
|
|
17
17
|
ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
|
|
18
|
-
ml_tools/custom_logger.py,sha256=
|
|
19
|
-
ml_tools/data_exploration.py,sha256=
|
|
18
|
+
ml_tools/custom_logger.py,sha256=njM_0XPbQ1S-x5LeSQAaTo2if-XVOR_pQSGg4EDeiTU,4603
|
|
19
|
+
ml_tools/data_exploration.py,sha256=qc_Oolxco2x9IhlYu5zPIuVBGiBw65HnypuGm8cQOOM,23677
|
|
20
20
|
ml_tools/datasetmaster.py,sha256=_tNC2v98eCQGr3nMW_EFs83TRgRme8Uc7ttg1vosmQU,30106
|
|
21
21
|
ml_tools/ensemble_inference.py,sha256=0SNX3YAz5bpvtwYmqEwqyWeIJP2Pb-v-bemENRSO7qg,9426
|
|
22
22
|
ml_tools/ensemble_learning.py,sha256=Zi1oy6G2FWnTI5hBwjlexwF3JKALFS2FN6F8HAlVi_s,35391
|
|
@@ -24,7 +24,7 @@ ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,1300
|
|
|
24
24
|
ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
|
|
25
25
|
ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
|
|
26
26
|
ml_tools/utilities.py,sha256=mz-M351DzxWxnYVcLX-7ZQ6c-RGoCV9g4VTS9Qif2Es,18348
|
|
27
|
-
dragon_ml_toolbox-4.
|
|
28
|
-
dragon_ml_toolbox-4.
|
|
29
|
-
dragon_ml_toolbox-4.
|
|
30
|
-
dragon_ml_toolbox-4.
|
|
27
|
+
dragon_ml_toolbox-4.5.0.dist-info/METADATA,sha256=PzpYHROSr85CBbNnCcS-XInzpOFhAyXbPZ5YkLaYbps,6572
|
|
28
|
+
dragon_ml_toolbox-4.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
dragon_ml_toolbox-4.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
30
|
+
dragon_ml_toolbox-4.5.0.dist-info/RECORD,,
|
ml_tools/ETL_engineering.py
CHANGED
|
@@ -569,7 +569,7 @@ class NumberExtractor:
|
|
|
569
569
|
self,
|
|
570
570
|
regex_pattern: str = r"(\d+\.?\d*)",
|
|
571
571
|
dtype: Literal["float", "int"] = "float",
|
|
572
|
-
round_digits: Optional[int] =
|
|
572
|
+
round_digits: Optional[int] = 2,
|
|
573
573
|
):
|
|
574
574
|
# --- Validation ---
|
|
575
575
|
if not isinstance(regex_pattern, str):
|
ml_tools/PSO_optimization.py
CHANGED
|
@@ -2,7 +2,7 @@ import numpy as np
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import xgboost as xgb
|
|
4
4
|
import lightgbm as lgb
|
|
5
|
-
from typing import Literal, Union, Tuple, Dict, Optional
|
|
5
|
+
from typing import Literal, Union, Tuple, Dict, Optional, Any
|
|
6
6
|
import pandas as pd
|
|
7
7
|
from copy import deepcopy
|
|
8
8
|
from .utilities import (
|
|
@@ -25,6 +25,7 @@ from contextlib import nullcontext
|
|
|
25
25
|
__all__ = [
|
|
26
26
|
"ObjectiveFunction",
|
|
27
27
|
"multiple_objective_functions_from_dir",
|
|
28
|
+
"parse_lower_upper_bounds",
|
|
28
29
|
"run_pso",
|
|
29
30
|
"plot_optimal_feature_distributions"
|
|
30
31
|
]
|
|
@@ -169,6 +170,18 @@ def multiple_objective_functions_from_dir(directory: Union[str,Path], add_noise:
|
|
|
169
170
|
return objective_functions, objective_function_names
|
|
170
171
|
|
|
171
172
|
|
|
173
|
+
def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
|
|
174
|
+
"""
|
|
175
|
+
Parse lower and upper boundaries, returning 2 lists:
|
|
176
|
+
|
|
177
|
+
`lower_bounds`, `upper_bounds`
|
|
178
|
+
"""
|
|
179
|
+
lower = [low[0] for low in source.values()]
|
|
180
|
+
upper = [up[1] for up in source.values()]
|
|
181
|
+
|
|
182
|
+
return lower, upper
|
|
183
|
+
|
|
184
|
+
|
|
172
185
|
def _set_boundaries(lower_boundaries: list[float], upper_boundaries: list[float]):
|
|
173
186
|
assert len(lower_boundaries) == len(upper_boundaries), "Lower and upper boundaries must have the same length."
|
|
174
187
|
assert len(lower_boundaries) >= 1, "At least one boundary pair is required."
|
ml_tools/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .custom_logger import custom_logger
|
ml_tools/custom_logger.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from typing import Union, List, Dict, Any
|
|
4
|
-
import pandas as pd
|
|
5
4
|
import traceback
|
|
6
5
|
import json
|
|
6
|
+
import csv
|
|
7
7
|
from .path_manager import sanitize_filename, make_fullpath
|
|
8
8
|
from ._script_info import _script_info
|
|
9
9
|
from ._logger import _LOGGER
|
|
@@ -18,7 +18,6 @@ def custom_logger(
|
|
|
18
18
|
data: Union[
|
|
19
19
|
List[Any],
|
|
20
20
|
Dict[Any, Any],
|
|
21
|
-
pd.DataFrame,
|
|
22
21
|
str,
|
|
23
22
|
BaseException
|
|
24
23
|
],
|
|
@@ -75,7 +74,7 @@ def custom_logger(
|
|
|
75
74
|
_log_exception_to_log(data, base_path.with_suffix(".log"))
|
|
76
75
|
|
|
77
76
|
else:
|
|
78
|
-
raise ValueError("Unsupported data type. Must be list, dict,
|
|
77
|
+
raise ValueError("Unsupported data type. Must be list, dict, str, or BaseException.")
|
|
79
78
|
|
|
80
79
|
_LOGGER.info(f"🗄️ Log saved to: '{base_path}'")
|
|
81
80
|
|
|
@@ -106,8 +105,19 @@ def _log_dict_to_csv(data: Dict[Any, List[Any]], path: Path) -> None:
|
|
|
106
105
|
padded_value = value + [None] * (max_length - len(value))
|
|
107
106
|
sanitized_dict[sanitized_key] = padded_value
|
|
108
107
|
|
|
109
|
-
|
|
110
|
-
|
|
108
|
+
# The `newline=''` argument is important to prevent extra blank rows
|
|
109
|
+
with open(path, 'w', newline='', encoding='utf-8') as csv_file:
|
|
110
|
+
writer = csv.writer(csv_file)
|
|
111
|
+
|
|
112
|
+
# 1. Write the header row from the sanitized dictionary keys
|
|
113
|
+
header = list(sanitized_dict.keys())
|
|
114
|
+
writer.writerow(header)
|
|
115
|
+
|
|
116
|
+
# 2. Transpose columns to rows and write them
|
|
117
|
+
# zip(*sanitized_dict.values()) elegantly converts the column data
|
|
118
|
+
# (lists in the dict) into row-by-row tuples.
|
|
119
|
+
rows_to_write = zip(*sanitized_dict.values())
|
|
120
|
+
writer.writerows(rows_to_write)
|
|
111
121
|
|
|
112
122
|
|
|
113
123
|
def _log_string_to_log(data: str, path: Path) -> None:
|
ml_tools/data_exploration.py
CHANGED
|
@@ -126,7 +126,7 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
|
|
|
126
126
|
# Stage 1: Drop rows with all target columns missing
|
|
127
127
|
if targets is not None:
|
|
128
128
|
# validate targets
|
|
129
|
-
valid_targets =
|
|
129
|
+
valid_targets = _validate_columns(df_clean, targets)
|
|
130
130
|
target_na = df_clean[valid_targets].isnull().all(axis=1)
|
|
131
131
|
if target_na.any():
|
|
132
132
|
print(f"🧹 Dropping {target_na.sum()} rows with all target columns missing.")
|
|
@@ -134,10 +134,10 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
|
|
|
134
134
|
else:
|
|
135
135
|
print("✅ No rows with all targets missing.")
|
|
136
136
|
else:
|
|
137
|
-
|
|
137
|
+
valid_targets = []
|
|
138
138
|
|
|
139
139
|
# Stage 2: Drop rows based on feature column missing values
|
|
140
|
-
feature_cols = [col for col in df_clean.columns if col not in
|
|
140
|
+
feature_cols = [col for col in df_clean.columns if col not in valid_targets]
|
|
141
141
|
if feature_cols:
|
|
142
142
|
feature_na_frac = df_clean[feature_cols].isnull().mean(axis=1)
|
|
143
143
|
rows_to_drop = feature_na_frac[feature_na_frac > threshold].index
|
|
@@ -238,8 +238,9 @@ def split_features_targets(df: pd.DataFrame, targets: list[str]):
|
|
|
238
238
|
- Shape of the features dataframe.
|
|
239
239
|
- Shape of the targets dataframe.
|
|
240
240
|
"""
|
|
241
|
-
|
|
242
|
-
|
|
241
|
+
valid_targets = _validate_columns(df, targets)
|
|
242
|
+
df_targets = df[valid_targets]
|
|
243
|
+
df_features = df.drop(columns=valid_targets)
|
|
243
244
|
print(f"Original shape: {df.shape}\nFeatures shape: {df_features.shape}\nTargets shape: {df_targets.shape}")
|
|
244
245
|
return df_features, df_targets
|
|
245
246
|
|
|
@@ -347,7 +348,7 @@ def plot_correlation_heatmap(df: pd.DataFrame,
|
|
|
347
348
|
full_path = save_path / plot_title
|
|
348
349
|
|
|
349
350
|
plt.savefig(full_path, bbox_inches="tight", format='svg')
|
|
350
|
-
print(f"Saved correlation heatmap: '{plot_title}
|
|
351
|
+
print(f"Saved correlation heatmap: '{plot_title}'")
|
|
351
352
|
|
|
352
353
|
plt.show()
|
|
353
354
|
plt.close()
|
|
@@ -644,5 +645,10 @@ def standardize_percentages(
|
|
|
644
645
|
return df_copy
|
|
645
646
|
|
|
646
647
|
|
|
648
|
+
def _validate_columns(df: pd.DataFrame, columns: list[str]):
|
|
649
|
+
valid_columns = [column for column in columns if column in df.columns]
|
|
650
|
+
return valid_columns
|
|
651
|
+
|
|
652
|
+
|
|
647
653
|
def info():
|
|
648
654
|
_script_info(__all__)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|