dragon-ml-toolbox 12.6.0__tar.gz → 12.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-12.6.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.8.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_utilities.py +13 -2
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/data_exploration.py +1 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/optimization_tools.py +3 -3
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/utilities.py +49 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/LICENSE +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/README.md +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ETL_cleaning.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_simple_optimization.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/constants.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/math_utilities.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/ml_tools/serde.py +0 -0
- {dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/setup.cfg +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Union, Any
|
|
3
|
+
from typing import Union, Any, Optional
|
|
4
4
|
|
|
5
5
|
from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
|
|
6
6
|
from ._script_info import _script_info
|
|
7
7
|
from ._logger import _LOGGER
|
|
8
8
|
from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys
|
|
9
9
|
from .utilities import load_dataframe
|
|
10
|
+
from .custom_logger import save_list_strings
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
@@ -139,6 +140,7 @@ def find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, v
|
|
|
139
140
|
def select_features_by_shap(
|
|
140
141
|
root_directory: Union[str, Path],
|
|
141
142
|
shap_threshold: float,
|
|
143
|
+
log_feature_names_directory: Optional[Union[str, Path]],
|
|
142
144
|
verbose: bool = True) -> list[str]:
|
|
143
145
|
"""
|
|
144
146
|
Scans subdirectories to find SHAP summary CSVs, then extracts feature
|
|
@@ -148,11 +150,13 @@ def select_features_by_shap(
|
|
|
148
150
|
importance scores aggregated from multiple models.
|
|
149
151
|
|
|
150
152
|
Args:
|
|
151
|
-
root_directory (
|
|
153
|
+
root_directory (str | Path):
|
|
152
154
|
The path to the root directory that contains model subdirectories.
|
|
153
155
|
shap_threshold (float):
|
|
154
156
|
The minimum mean absolute SHAP value for a feature to be included
|
|
155
157
|
in the final list.
|
|
158
|
+
log_feature_names_directory (str | Path | None):
|
|
159
|
+
If given, saves the chosen feature names as a .txt file in this directory.
|
|
156
160
|
|
|
157
161
|
Returns:
|
|
158
162
|
list[str]:
|
|
@@ -211,6 +215,13 @@ def select_features_by_shap(
|
|
|
211
215
|
final_features = sorted(list(master_feature_set))
|
|
212
216
|
if verbose:
|
|
213
217
|
_LOGGER.info(f"Selected {len(final_features)} unique features across all files.")
|
|
218
|
+
|
|
219
|
+
if log_feature_names_directory is not None:
|
|
220
|
+
save_names_path = make_fullpath(log_feature_names_directory, make=True, enforce="directory")
|
|
221
|
+
save_list_strings(list_strings=final_features,
|
|
222
|
+
directory=save_names_path,
|
|
223
|
+
filename=DatasetKeys.FEATURE_NAMES,
|
|
224
|
+
verbose=verbose)
|
|
214
225
|
|
|
215
226
|
return final_features
|
|
216
227
|
|
|
@@ -362,6 +362,7 @@ def encode_categorical_features(
|
|
|
362
362
|
encode_nulls (bool): If True, encodes Null values as a distinct category
|
|
363
363
|
"Other" with a value of 0. Other categories start from 1.
|
|
364
364
|
If False, Nulls are ignored and categories start from 0.
|
|
365
|
+
Note: Use False when encoding binary values with missing entries.
|
|
365
366
|
split_resulting_dataset (bool): If True, returns two separate DataFrames:
|
|
366
367
|
one with non-categorical columns and one with the encoded columns.
|
|
367
368
|
If False, returns a single DataFrame with all columns.
|
|
@@ -98,7 +98,7 @@ def create_optimization_bounds(
|
|
|
98
98
|
|
|
99
99
|
# 3. Populate categorical bounds (Index-based)
|
|
100
100
|
# The indices in categorical_map (e.g., {2: 4}) directly correspond
|
|
101
|
-
# to the indices in
|
|
101
|
+
# to the indices in the `feature_names` list.
|
|
102
102
|
for index, cardinality in categorical_map.items():
|
|
103
103
|
if not (0 <= index < total_features):
|
|
104
104
|
_LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
|
|
@@ -125,8 +125,8 @@ def create_optimization_bounds(
|
|
|
125
125
|
# Map name to its index in the *feature-only* list
|
|
126
126
|
index = feature_names.index(name)
|
|
127
127
|
except ValueError:
|
|
128
|
-
_LOGGER.
|
|
129
|
-
|
|
128
|
+
_LOGGER.warning(f"Feature name '{name}' from 'continuous_bounds_map' not found in the CSV's feature columns.")
|
|
129
|
+
continue
|
|
130
130
|
|
|
131
131
|
if lower_bounds[index] is not None:
|
|
132
132
|
# This index was already set by the categorical map
|
|
@@ -12,6 +12,7 @@ from ._logger import _LOGGER
|
|
|
12
12
|
# Keep track of available tools
|
|
13
13
|
__all__ = [
|
|
14
14
|
"load_dataframe",
|
|
15
|
+
"load_dataframe_greedy",
|
|
15
16
|
"yield_dataframes_from_dir",
|
|
16
17
|
"merge_dataframes",
|
|
17
18
|
"save_dataframe_filename",
|
|
@@ -124,6 +125,54 @@ def load_dataframe(
|
|
|
124
125
|
return df, df_name # type: ignore
|
|
125
126
|
|
|
126
127
|
|
|
128
|
+
def load_dataframe_greedy(directory: Union[str, Path],
|
|
129
|
+
use_columns: Optional[list[str]] = None,
|
|
130
|
+
all_strings: bool = False,
|
|
131
|
+
verbose: bool = True) -> pd.DataFrame:
|
|
132
|
+
"""
|
|
133
|
+
Greedily loads the first found CSV file from a directory into a Pandas DataFrame.
|
|
134
|
+
|
|
135
|
+
This function scans the specified directory for any CSV files. It will
|
|
136
|
+
attempt to load the *first* CSV file it finds using the `load_dataframe`
|
|
137
|
+
function as a Pandas DataFrame.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
directory (str, Path):
|
|
141
|
+
The path to the directory to search for a CSV file.
|
|
142
|
+
use_columns (list[str] | None):
|
|
143
|
+
A list of column names to load. If None, all columns are loaded.
|
|
144
|
+
all_strings (bool):
|
|
145
|
+
If True, loads all columns as string data types.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
pd.DataFrame:
|
|
149
|
+
A pandas DataFrame loaded from the first CSV file found.
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
FileNotFoundError:
|
|
153
|
+
If the specified directory does not exist or the CSV file path
|
|
154
|
+
found is invalid.
|
|
155
|
+
ValueError:
|
|
156
|
+
If the loaded DataFrame is empty or `use_columns` contains
|
|
157
|
+
invalid column names.
|
|
158
|
+
"""
|
|
159
|
+
# validate directory
|
|
160
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
161
|
+
|
|
162
|
+
# list all csv files and grab one (should be the only one)
|
|
163
|
+
csv_dict = list_csv_paths(directory=dir_path, verbose=False)
|
|
164
|
+
|
|
165
|
+
for df_path in csv_dict.values():
|
|
166
|
+
df , _df_name = load_dataframe(df_path=df_path,
|
|
167
|
+
use_columns=use_columns,
|
|
168
|
+
kind="pandas",
|
|
169
|
+
all_strings=all_strings,
|
|
170
|
+
verbose=verbose)
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return df
|
|
174
|
+
|
|
175
|
+
|
|
127
176
|
def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True):
|
|
128
177
|
"""
|
|
129
178
|
Iterates over all CSV files in a given directory, loading each into a Pandas DataFrame.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/requires.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-12.6.0 → dragon_ml_toolbox-12.8.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|