dragon-ml-toolbox 1.4.7__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/LICENSE-THIRD-PARTY.md +5 -4
- {dragon_ml_toolbox-1.4.7/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-2.0.0}/PKG-INFO +24 -14
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/README.md +20 -11
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0/dragon_ml_toolbox.egg-info}/PKG-INFO +24 -14
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +2 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/dragon_ml_toolbox.egg-info/requires.txt +2 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/MICE_imputation.py +1 -1
- dragon_ml_toolbox-2.0.0/ml_tools/PSO_optimization.py +490 -0
- dragon_ml_toolbox-1.4.7/ml_tools/particle_swarm_optimization.py → dragon_ml_toolbox-2.0.0/ml_tools/_particle_swarm_optimization.py +5 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/data_exploration.py +1 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/ensemble_learning.py +0 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/handle_excel.py +1 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/logger.py +1 -1
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/utilities.py +34 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/pyproject.toml +5 -4
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/LICENSE +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/datasetmaster.py +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/pytorch_models.py +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/trainer.py +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/ml_tools/vision_helpers.py +0 -0
- {dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/setup.cfg +0 -0
|
@@ -5,10 +5,10 @@ This project depends on the following third-party packages. Each is governed by
|
|
|
5
5
|
- [pandas](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
|
|
6
6
|
- [numpy](https://github.com/numpy/numpy/blob/main/LICENSE.txt)
|
|
7
7
|
- [matplotlib](https://github.com/matplotlib/matplotlib/blob/main/LICENSE/LICENSE)
|
|
8
|
-
- [seaborn](https://github.com/mwaskom/seaborn/blob/
|
|
8
|
+
- [seaborn](https://github.com/mwaskom/seaborn/blob/master/LICENSE.md)
|
|
9
9
|
- [statsmodels](https://github.com/statsmodels/statsmodels/blob/main/LICENSE.txt)
|
|
10
|
-
- [ipython](https://github.com/ipython/ipython/blob/main/
|
|
11
|
-
- [ipykernel](https://github.com/ipython/ipykernel/blob/main/
|
|
10
|
+
- [ipython](https://github.com/ipython/ipython/blob/main/LICENSE)
|
|
11
|
+
- [ipykernel](https://github.com/ipython/ipykernel/blob/main/LICENSE)
|
|
12
12
|
- [notebook](https://github.com/jupyter/notebook/blob/main/LICENSE)
|
|
13
13
|
- [jupyterlab](https://github.com/jupyterlab/jupyterlab/blob/main/LICENSE)
|
|
14
14
|
- [ipywidgets](https://github.com/jupyter-widgets/ipywidgets/blob/main/LICENSE)
|
|
@@ -24,5 +24,6 @@ This project depends on the following third-party packages. Each is governed by
|
|
|
24
24
|
- [openpyxl](https://github.com/chronossc/openpyxl/blob/main/LICENSE)
|
|
25
25
|
- [miceforest](https://github.com/AnotherSamWilson/miceforest/blob/main/LICENSE)
|
|
26
26
|
- [polars](https://github.com/pola-rs/polars/blob/main/LICENSE)
|
|
27
|
-
- [plotnine](https://github.com/has2k1/plotnine/blob/main/LICENSE
|
|
27
|
+
- [plotnine](https://github.com/has2k1/plotnine/blob/main/LICENSE)
|
|
28
28
|
- [pyswarm](https://pythonhosted.org/pyswarm/#license)
|
|
29
|
+
- [tqdm](https://github.com/tqdm/tqdm/blob/master/LICENSE)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: A collection of tools for data science and machine learning projects
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
|
|
|
8
8
|
Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
License-File: LICENSE-THIRD-PARTY.md
|
|
@@ -32,9 +32,10 @@ Requires-Dist: joblib
|
|
|
32
32
|
Requires-Dist: xgboost
|
|
33
33
|
Requires-Dist: lightgbm<=4.5.0
|
|
34
34
|
Requires-Dist: shap
|
|
35
|
+
Requires-Dist: tqdm>=4.0
|
|
36
|
+
Requires-Dist: Pillow
|
|
35
37
|
Provides-Extra: pytorch
|
|
36
38
|
Requires-Dist: torch; extra == "pytorch"
|
|
37
|
-
Requires-Dist: Pillow; extra == "pytorch"
|
|
38
39
|
Requires-Dist: torchvision; extra == "pytorch"
|
|
39
40
|
Dynamic: license-file
|
|
40
41
|
|
|
@@ -49,7 +50,7 @@ A collection of Python utilities for data science and machine learning, structur
|
|
|
49
50
|
|
|
50
51
|
## Installation
|
|
51
52
|
|
|
52
|
-
**Python 3.
|
|
53
|
+
**Python 3.10+ recommended.**
|
|
53
54
|
|
|
54
55
|
### Via PyPI
|
|
55
56
|
|
|
@@ -59,6 +60,16 @@ Install the latest stable release from PyPI:
|
|
|
59
60
|
pip install dragon-ml-tools
|
|
60
61
|
```
|
|
61
62
|
|
|
63
|
+
### Via GitHub (Editable)
|
|
64
|
+
|
|
65
|
+
Clone the repository and install in editable mode with optional dependencies:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
69
|
+
cd ML_tools
|
|
70
|
+
pip install -e .
|
|
71
|
+
```
|
|
72
|
+
|
|
62
73
|
### Via conda-forge
|
|
63
74
|
|
|
64
75
|
Install from the conda-forge channel:
|
|
@@ -66,22 +77,21 @@ Install from the conda-forge channel:
|
|
|
66
77
|
```bash
|
|
67
78
|
conda install -c conda-forge dragon-ml-toolbox
|
|
68
79
|
```
|
|
80
|
+
**Note:** This version is outdated or broken due to dependency incompatibilities.
|
|
69
81
|
|
|
70
|
-
|
|
82
|
+
## Optional dependencies
|
|
83
|
+
|
|
84
|
+
**PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
|
|
85
|
+
|
|
86
|
+
Install the default CPU-only version with
|
|
71
87
|
|
|
72
88
|
```bash
|
|
73
89
|
pip install dragon-ml-tools[pytorch]
|
|
74
90
|
```
|
|
75
91
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
Clone the repository and install in editable mode with optional dependencies:
|
|
92
|
+
To make use of GPU acceleration use the official PyTorch installation instructions:
|
|
79
93
|
|
|
80
|
-
|
|
81
|
-
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
82
|
-
cd ML_tools
|
|
83
|
-
pip install -e .
|
|
84
|
-
```
|
|
94
|
+
[PyTorch Instructions](https://pytorch.org/get-started/locally/)
|
|
85
95
|
|
|
86
96
|
## Usage
|
|
87
97
|
|
|
@@ -101,7 +111,7 @@ ensemble_learning
|
|
|
101
111
|
handle_excel
|
|
102
112
|
logger
|
|
103
113
|
MICE_imputation
|
|
104
|
-
|
|
114
|
+
PSO_optimization
|
|
105
115
|
trainer
|
|
106
116
|
utilities
|
|
107
117
|
VIF_factor
|
|
@@ -9,7 +9,7 @@ A collection of Python utilities for data science and machine learning, structur
|
|
|
9
9
|
|
|
10
10
|
## Installation
|
|
11
11
|
|
|
12
|
-
**Python 3.
|
|
12
|
+
**Python 3.10+ recommended.**
|
|
13
13
|
|
|
14
14
|
### Via PyPI
|
|
15
15
|
|
|
@@ -19,6 +19,16 @@ Install the latest stable release from PyPI:
|
|
|
19
19
|
pip install dragon-ml-tools
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
+
### Via GitHub (Editable)
|
|
23
|
+
|
|
24
|
+
Clone the repository and install in editable mode with optional dependencies:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
28
|
+
cd ML_tools
|
|
29
|
+
pip install -e .
|
|
30
|
+
```
|
|
31
|
+
|
|
22
32
|
### Via conda-forge
|
|
23
33
|
|
|
24
34
|
Install from the conda-forge channel:
|
|
@@ -26,22 +36,21 @@ Install from the conda-forge channel:
|
|
|
26
36
|
```bash
|
|
27
37
|
conda install -c conda-forge dragon-ml-toolbox
|
|
28
38
|
```
|
|
39
|
+
**Note:** This version is outdated or broken due to dependency incompatibilities.
|
|
29
40
|
|
|
30
|
-
|
|
41
|
+
## Optional dependencies
|
|
42
|
+
|
|
43
|
+
**PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
|
|
44
|
+
|
|
45
|
+
Install the default CPU-only version with
|
|
31
46
|
|
|
32
47
|
```bash
|
|
33
48
|
pip install dragon-ml-tools[pytorch]
|
|
34
49
|
```
|
|
35
50
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
Clone the repository and install in editable mode with optional dependencies:
|
|
51
|
+
To make use of GPU acceleration use the official PyTorch installation instructions:
|
|
39
52
|
|
|
40
|
-
|
|
41
|
-
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
42
|
-
cd ML_tools
|
|
43
|
-
pip install -e .
|
|
44
|
-
```
|
|
53
|
+
[PyTorch Instructions](https://pytorch.org/get-started/locally/)
|
|
45
54
|
|
|
46
55
|
## Usage
|
|
47
56
|
|
|
@@ -61,7 +70,7 @@ ensemble_learning
|
|
|
61
70
|
handle_excel
|
|
62
71
|
logger
|
|
63
72
|
MICE_imputation
|
|
64
|
-
|
|
73
|
+
PSO_optimization
|
|
65
74
|
trainer
|
|
66
75
|
utilities
|
|
67
76
|
VIF_factor
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: A collection of tools for data science and machine learning projects
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
|
|
|
8
8
|
Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
License-File: LICENSE-THIRD-PARTY.md
|
|
@@ -32,9 +32,10 @@ Requires-Dist: joblib
|
|
|
32
32
|
Requires-Dist: xgboost
|
|
33
33
|
Requires-Dist: lightgbm<=4.5.0
|
|
34
34
|
Requires-Dist: shap
|
|
35
|
+
Requires-Dist: tqdm>=4.0
|
|
36
|
+
Requires-Dist: Pillow
|
|
35
37
|
Provides-Extra: pytorch
|
|
36
38
|
Requires-Dist: torch; extra == "pytorch"
|
|
37
|
-
Requires-Dist: Pillow; extra == "pytorch"
|
|
38
39
|
Requires-Dist: torchvision; extra == "pytorch"
|
|
39
40
|
Dynamic: license-file
|
|
40
41
|
|
|
@@ -49,7 +50,7 @@ A collection of Python utilities for data science and machine learning, structur
|
|
|
49
50
|
|
|
50
51
|
## Installation
|
|
51
52
|
|
|
52
|
-
**Python 3.
|
|
53
|
+
**Python 3.10+ recommended.**
|
|
53
54
|
|
|
54
55
|
### Via PyPI
|
|
55
56
|
|
|
@@ -59,6 +60,16 @@ Install the latest stable release from PyPI:
|
|
|
59
60
|
pip install dragon-ml-tools
|
|
60
61
|
```
|
|
61
62
|
|
|
63
|
+
### Via GitHub (Editable)
|
|
64
|
+
|
|
65
|
+
Clone the repository and install in editable mode with optional dependencies:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
69
|
+
cd ML_tools
|
|
70
|
+
pip install -e .
|
|
71
|
+
```
|
|
72
|
+
|
|
62
73
|
### Via conda-forge
|
|
63
74
|
|
|
64
75
|
Install from the conda-forge channel:
|
|
@@ -66,22 +77,21 @@ Install from the conda-forge channel:
|
|
|
66
77
|
```bash
|
|
67
78
|
conda install -c conda-forge dragon-ml-toolbox
|
|
68
79
|
```
|
|
80
|
+
**Note:** This version is outdated or broken due to dependency incompatibilities.
|
|
69
81
|
|
|
70
|
-
|
|
82
|
+
## Optional dependencies
|
|
83
|
+
|
|
84
|
+
**PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
|
|
85
|
+
|
|
86
|
+
Install the default CPU-only version with
|
|
71
87
|
|
|
72
88
|
```bash
|
|
73
89
|
pip install dragon-ml-tools[pytorch]
|
|
74
90
|
```
|
|
75
91
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
Clone the repository and install in editable mode with optional dependencies:
|
|
92
|
+
To make use of GPU acceleration use the official PyTorch installation instructions:
|
|
79
93
|
|
|
80
|
-
|
|
81
|
-
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
82
|
-
cd ML_tools
|
|
83
|
-
pip install -e .
|
|
84
|
-
```
|
|
94
|
+
[PyTorch Instructions](https://pytorch.org/get-started/locally/)
|
|
85
95
|
|
|
86
96
|
## Usage
|
|
87
97
|
|
|
@@ -101,7 +111,7 @@ ensemble_learning
|
|
|
101
111
|
handle_excel
|
|
102
112
|
logger
|
|
103
113
|
MICE_imputation
|
|
104
|
-
|
|
114
|
+
PSO_optimization
|
|
105
115
|
trainer
|
|
106
116
|
utilities
|
|
107
117
|
VIF_factor
|
|
@@ -8,14 +8,15 @@ dragon_ml_toolbox.egg-info/dependency_links.txt
|
|
|
8
8
|
dragon_ml_toolbox.egg-info/requires.txt
|
|
9
9
|
dragon_ml_toolbox.egg-info/top_level.txt
|
|
10
10
|
ml_tools/MICE_imputation.py
|
|
11
|
+
ml_tools/PSO_optimization.py
|
|
11
12
|
ml_tools/VIF_factor.py
|
|
12
13
|
ml_tools/__init__.py
|
|
14
|
+
ml_tools/_particle_swarm_optimization.py
|
|
13
15
|
ml_tools/data_exploration.py
|
|
14
16
|
ml_tools/datasetmaster.py
|
|
15
17
|
ml_tools/ensemble_learning.py
|
|
16
18
|
ml_tools/handle_excel.py
|
|
17
19
|
ml_tools/logger.py
|
|
18
|
-
ml_tools/particle_swarm_optimization.py
|
|
19
20
|
ml_tools/pytorch_models.py
|
|
20
21
|
ml_tools/trainer.py
|
|
21
22
|
ml_tools/utilities.py
|
|
@@ -3,7 +3,7 @@ import miceforest as mf
|
|
|
3
3
|
import os
|
|
4
4
|
import matplotlib.pyplot as plt
|
|
5
5
|
import numpy as np
|
|
6
|
-
from
|
|
6
|
+
from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values
|
|
7
7
|
from plotnine import ggplot, labs, theme, element_blank # type: ignore
|
|
8
8
|
from typing import Optional
|
|
9
9
|
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import os
|
|
3
|
+
import xgboost as xgb
|
|
4
|
+
import lightgbm as lgb
|
|
5
|
+
from sklearn.ensemble import HistGradientBoostingRegressor
|
|
6
|
+
from sklearn.base import ClassifierMixin
|
|
7
|
+
from typing import Literal, Union, Tuple, Dict, Optional
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from copy import deepcopy
|
|
10
|
+
from .utilities import _script_info, threshold_binary_values, threshold_binary_values_batch, deserialize_object, list_files_by_extension, save_dataframe
|
|
11
|
+
import torch
|
|
12
|
+
from tqdm import trange
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ObjectiveFunction",
|
|
17
|
+
"multiple_objective_functions_from_dir",
|
|
18
|
+
"run_pso"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ObjectiveFunction():
|
|
23
|
+
"""
|
|
24
|
+
Callable objective function designed for optimizing continuous outputs from tree-based regression models.
|
|
25
|
+
|
|
26
|
+
The target serialized file (joblib) must include a trained tree-based 'model'. Additionally 'feature_names' and 'target_name' will be parsed if present.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
trained_model_path : str
|
|
31
|
+
Path to a serialized model (joblib) compatible with scikit-learn-like `.predict`.
|
|
32
|
+
add_noise : bool
|
|
33
|
+
Whether to apply multiplicative noise to the input features during evaluation.
|
|
34
|
+
task : (Literal["maximization", "minimization"])
|
|
35
|
+
Whether to maximize or minimize the target.
|
|
36
|
+
binary_features : int
|
|
37
|
+
Number of binary features located at the END of the feature vector. Model should be trained with continuous features first, followed by binary.
|
|
38
|
+
"""
|
|
39
|
+
def __init__(self, trained_model_path: str, add_noise: bool, task: Literal["maximization", "minimization"], binary_features: int) -> None:
|
|
40
|
+
self.binary_features = binary_features
|
|
41
|
+
self.is_hybrid = False if binary_features <= 0 else True
|
|
42
|
+
self.use_noise = add_noise
|
|
43
|
+
self._artifact = deserialize_object(trained_model_path, verbose=False, raise_on_error=True)
|
|
44
|
+
self.model = self._get_from_artifact('model')
|
|
45
|
+
self.feature_names: Optional[list[str]] = self._get_from_artifact('feature_names') # type: ignore
|
|
46
|
+
self.target_name: Optional[str] = self._get_from_artifact('target_name') # type: ignore
|
|
47
|
+
self.task = task
|
|
48
|
+
self.check_model() # check for classification models and None values
|
|
49
|
+
|
|
50
|
+
def __call__(self, features_array: np.ndarray) -> np.ndarray:
|
|
51
|
+
"""
|
|
52
|
+
Batched evaluation for PSO. Accepts 2D array (n_samples, n_features).
|
|
53
|
+
|
|
54
|
+
Applies optional noise and hybrid binary thresholding.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
np.ndarray
|
|
59
|
+
1D array with length n_samples containing predicted target values.
|
|
60
|
+
"""
|
|
61
|
+
assert features_array.ndim == 2, f"Expected 2D array, got shape {features_array.shape}"
|
|
62
|
+
|
|
63
|
+
# Apply noise if enabled
|
|
64
|
+
if self.use_noise:
|
|
65
|
+
features_array = self.add_noise(features_array)
|
|
66
|
+
|
|
67
|
+
# Apply binary thresholding if enabled
|
|
68
|
+
if self.is_hybrid:
|
|
69
|
+
features_array = threshold_binary_values_batch(features_array, self.binary_features)
|
|
70
|
+
|
|
71
|
+
# Ensure correct type
|
|
72
|
+
features_array = features_array.astype(np.float32)
|
|
73
|
+
|
|
74
|
+
# Evaluate
|
|
75
|
+
result = self.model.predict(features_array) # type: ignore
|
|
76
|
+
|
|
77
|
+
# Flip sign if maximizing
|
|
78
|
+
if self.task == "maximization":
|
|
79
|
+
return -result
|
|
80
|
+
return result
|
|
81
|
+
|
|
82
|
+
def add_noise(self, features_array: np.ndarray) -> np.ndarray:
|
|
83
|
+
"""
|
|
84
|
+
Apply multiplicative noise to input feature batch (2D).
|
|
85
|
+
Binary features (if present) are excluded from noise injection.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
features_array : np.ndarray
|
|
90
|
+
Input array of shape (batch_size, n_features)
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
np.ndarray
|
|
95
|
+
Noised array of same shape
|
|
96
|
+
"""
|
|
97
|
+
assert features_array.ndim == 2, "Expected 2D array for batch noise injection"
|
|
98
|
+
|
|
99
|
+
if self.binary_features > 0:
|
|
100
|
+
split_idx = -self.binary_features
|
|
101
|
+
cont_part = features_array[:, :split_idx]
|
|
102
|
+
bin_part = features_array[:, split_idx:]
|
|
103
|
+
|
|
104
|
+
noise = np.random.uniform(0.95, 1.05, size=cont_part.shape)
|
|
105
|
+
cont_noised = cont_part * noise
|
|
106
|
+
|
|
107
|
+
return np.hstack([cont_noised, bin_part])
|
|
108
|
+
else:
|
|
109
|
+
noise = np.random.uniform(0.95, 1.05, size=features_array.shape)
|
|
110
|
+
return features_array * noise
|
|
111
|
+
|
|
112
|
+
def check_model(self):
|
|
113
|
+
if isinstance(self.model, ClassifierMixin) or isinstance(self.model, xgb.XGBClassifier) or isinstance(self.model, lgb.LGBMClassifier):
|
|
114
|
+
raise ValueError(f"[Model Check Failed] ❌\nThe loaded model ({type(self.model).__name__}) is a Classifier.\nOptimization is not suitable for standard classification tasks.")
|
|
115
|
+
if self.model is None:
|
|
116
|
+
raise ValueError("Loaded model is None")
|
|
117
|
+
|
|
118
|
+
def _get_from_artifact(self, key: str):
|
|
119
|
+
if self._artifact is None:
|
|
120
|
+
raise TypeError("Load model error")
|
|
121
|
+
val = self._artifact.get(key)
|
|
122
|
+
if key == "feature_names":
|
|
123
|
+
result = val if isinstance(val, list) and val else None
|
|
124
|
+
else:
|
|
125
|
+
result = val if val else None
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
def __repr__(self):
|
|
129
|
+
return (f"<ObjectiveFunction(model={type(self.model).__name__}, use_noise={self.use_noise}, is_hybrid={self.is_hybrid}, task='{self.task}')>")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def multiple_objective_functions_from_dir(directory: str, add_noise: bool, task: Literal["maximization", "minimization"], binary_features: int):
|
|
133
|
+
"""
|
|
134
|
+
Loads multiple objective functions from serialized models in the given directory.
|
|
135
|
+
|
|
136
|
+
Each `.joblib` file which is loaded and wrapped as an `ObjectiveFunction` instance. Returns a list of such instances along with their corresponding names.
|
|
137
|
+
|
|
138
|
+
Parameters:
|
|
139
|
+
directory (str) : Path to the directory containing `.joblib` files (serialized models).
|
|
140
|
+
add_noise (bool) : Whether to apply multiplicative noise to the input features during evaluation.
|
|
141
|
+
task (Literal["maximization", "minimization"]) : Defines the nature of the optimization task.
|
|
142
|
+
binary_features (int) : Number of binary features expected by each objective function.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
(tuple[list[ObjectiveFunction], list[str]]) : A tuple containing:
|
|
146
|
+
- list of `ObjectiveFunction` instances.
|
|
147
|
+
- list of corresponding filenames.
|
|
148
|
+
"""
|
|
149
|
+
objective_functions = list()
|
|
150
|
+
objective_function_names = list()
|
|
151
|
+
for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib').items():
|
|
152
|
+
current_objective = ObjectiveFunction(trained_model_path=file_path,
|
|
153
|
+
add_noise=add_noise,
|
|
154
|
+
task=task,
|
|
155
|
+
binary_features=binary_features)
|
|
156
|
+
objective_functions.append(current_objective)
|
|
157
|
+
objective_function_names.append(file_name)
|
|
158
|
+
return objective_functions, objective_function_names
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _set_boundaries(lower_boundaries: list[float], upper_boundaries: list[float]):
|
|
162
|
+
assert len(lower_boundaries) == len(upper_boundaries), "Lower and upper boundaries must have the same length."
|
|
163
|
+
assert len(lower_boundaries) >= 1, "At least one boundary pair is required."
|
|
164
|
+
lower = np.array(lower_boundaries)
|
|
165
|
+
upper = np.array(upper_boundaries)
|
|
166
|
+
return lower, upper
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _set_feature_names(size: int, names: Union[list[str], None]):
|
|
170
|
+
if names is None:
|
|
171
|
+
return [str(i) for i in range(1, size+1)]
|
|
172
|
+
else:
|
|
173
|
+
assert len(names) == size, "List with feature names do not match the number of features"
|
|
174
|
+
return names
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _save_results(*dicts, save_dir: str, target_name: str):
|
|
178
|
+
combined_dict = dict()
|
|
179
|
+
for single_dict in dicts:
|
|
180
|
+
combined_dict.update(single_dict)
|
|
181
|
+
|
|
182
|
+
df = pd.DataFrame(combined_dict)
|
|
183
|
+
|
|
184
|
+
save_dataframe(df=df, save_dir=save_dir, filename=f"Optimization_{target_name}")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def run_pso(lower_boundaries: list[float],
|
|
188
|
+
upper_boundaries: list[float],
|
|
189
|
+
objective_function: ObjectiveFunction,
|
|
190
|
+
save_results_dir: str,
|
|
191
|
+
auto_binary_boundaries: bool=True,
|
|
192
|
+
target_name: Union[str, None]=None,
|
|
193
|
+
feature_names: Union[list[str], None]=None,
|
|
194
|
+
swarm_size: int=200,
|
|
195
|
+
max_iterations: int=1000,
|
|
196
|
+
random_state: int=101,
|
|
197
|
+
post_hoc_analysis: Optional[int]=3) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
|
|
198
|
+
"""
|
|
199
|
+
Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
lower_boundaries : list[float]
|
|
204
|
+
Lower bounds for each feature in the search space (as many as features expected by the model).
|
|
205
|
+
upper_boundaries : list[float]
|
|
206
|
+
Upper bounds for each feature in the search space (as many as features expected by the model).
|
|
207
|
+
objective_function : ObjectiveFunction
|
|
208
|
+
A callable object encapsulating a tree-based regression model.
|
|
209
|
+
save_results_dir : str
|
|
210
|
+
Directory path to save the results CSV file.
|
|
211
|
+
auto_binary_boundaries : bool
|
|
212
|
+
Use `ObjectiveFunction.binary_features` to append as many binary boundaries as needed to `lower_boundaries` and `upper_boundaries` automatically.
|
|
213
|
+
target_name : str or None, optional
|
|
214
|
+
Name of the target variable. If None, attempts to retrieve from the ObjectiveFunction object.
|
|
215
|
+
feature_names : list[str] or None, optional
|
|
216
|
+
List of feature names. If None, attempts to retrieve from the ObjectiveFunction or generate generic names.
|
|
217
|
+
swarm_size : int
|
|
218
|
+
Number of particles in the swarm.
|
|
219
|
+
max_iterations : int
|
|
220
|
+
Maximum number of iterations for the optimization algorithm.
|
|
221
|
+
post_hoc_analysis : int or None
|
|
222
|
+
If specified, runs the optimization multiple times to perform post hoc analysis. The value indicates the number of repetitions.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]
|
|
227
|
+
If `post_hoc_analysis` is None, returns two dictionaries:
|
|
228
|
+
- feature_names: Feature values (after inverse scaling) that yield the best result.
|
|
229
|
+
- target_name: Best result obtained for the target variable.
|
|
230
|
+
|
|
231
|
+
If `post_hoc_analysis` is an integer, returns two dictionaries:
|
|
232
|
+
- feature_names: Lists of best feature values (after inverse scaling) for each repetition.
|
|
233
|
+
- target_name: List of best target values across repetitions.
|
|
234
|
+
|
|
235
|
+
Notes
|
|
236
|
+
-----
|
|
237
|
+
- PSO minimizes the objective function by default; if maximization is desired, it should be handled inside the ObjectiveFunction.
|
|
238
|
+
"""
|
|
239
|
+
# Select device
|
|
240
|
+
if torch.cuda.is_available():
|
|
241
|
+
device = torch.device("cuda")
|
|
242
|
+
elif torch.backends.mps.is_available():
|
|
243
|
+
device = torch.device("mps")
|
|
244
|
+
else:
|
|
245
|
+
device = torch.device("cpu")
|
|
246
|
+
print(f"[PSO] Using device: '{device}'")
|
|
247
|
+
|
|
248
|
+
# set local deep copies to prevent in place list modification
|
|
249
|
+
local_lower_boundaries = deepcopy(lower_boundaries)
|
|
250
|
+
local_upper_boundaries = deepcopy(upper_boundaries)
|
|
251
|
+
|
|
252
|
+
# Append binary boundaries
|
|
253
|
+
binary_number = objective_function.binary_features
|
|
254
|
+
if auto_binary_boundaries and binary_number > 0:
|
|
255
|
+
local_lower_boundaries.extend([0] * binary_number)
|
|
256
|
+
local_upper_boundaries.extend([1] * binary_number)
|
|
257
|
+
|
|
258
|
+
# Set the total length of features
|
|
259
|
+
size_of_features = len(local_lower_boundaries)
|
|
260
|
+
|
|
261
|
+
lower, upper = _set_boundaries(local_lower_boundaries, local_upper_boundaries)
|
|
262
|
+
|
|
263
|
+
# feature names
|
|
264
|
+
if feature_names is None and objective_function.feature_names is not None:
|
|
265
|
+
feature_names = objective_function.feature_names
|
|
266
|
+
names = _set_feature_names(size=size_of_features, names=feature_names)
|
|
267
|
+
|
|
268
|
+
# target name
|
|
269
|
+
if target_name is None and objective_function.target_name is not None:
|
|
270
|
+
target_name = objective_function.target_name
|
|
271
|
+
if target_name is None:
|
|
272
|
+
target_name = "Target"
|
|
273
|
+
|
|
274
|
+
arguments = {
|
|
275
|
+
"func":objective_function,
|
|
276
|
+
"lb": lower,
|
|
277
|
+
"ub": upper,
|
|
278
|
+
"device": device,
|
|
279
|
+
"swarmsize": swarm_size,
|
|
280
|
+
"maxiter": max_iterations,
|
|
281
|
+
"particle_output": False,
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
os.makedirs(save_results_dir, exist_ok=True)
|
|
285
|
+
|
|
286
|
+
if post_hoc_analysis is None or post_hoc_analysis == 1:
|
|
287
|
+
arguments.update({"seed": random_state})
|
|
288
|
+
|
|
289
|
+
best_features, best_target, *_ = _pso(**arguments)
|
|
290
|
+
# best_features, best_target, _particle_positions, _target_values_per_position = _pso(**arguments)
|
|
291
|
+
|
|
292
|
+
# flip best_target if maximization was used
|
|
293
|
+
if objective_function.task == "maximization":
|
|
294
|
+
best_target = -best_target
|
|
295
|
+
|
|
296
|
+
# threshold binary features
|
|
297
|
+
best_features_threshold = threshold_binary_values(best_features, binary_number)
|
|
298
|
+
|
|
299
|
+
# name features
|
|
300
|
+
best_features_named = {name: value for name, value in zip(names, best_features_threshold)}
|
|
301
|
+
best_target_named = {target_name: best_target}
|
|
302
|
+
|
|
303
|
+
# save results
|
|
304
|
+
_save_results(best_features_named, best_target_named, save_dir=save_results_dir, target_name=target_name)
|
|
305
|
+
|
|
306
|
+
return best_features_named, best_target_named
|
|
307
|
+
else:
|
|
308
|
+
all_best_targets = list()
|
|
309
|
+
all_best_features = [[] for _ in range(size_of_features)]
|
|
310
|
+
for _ in range(post_hoc_analysis):
|
|
311
|
+
best_features, best_target, *_ = _pso(**arguments)
|
|
312
|
+
# best_features, best_target, _particle_positions, _target_values_per_position = _pso(**arguments)
|
|
313
|
+
|
|
314
|
+
# flip best_target if maximization was used
|
|
315
|
+
if objective_function.task == "maximization":
|
|
316
|
+
best_target = -best_target
|
|
317
|
+
|
|
318
|
+
# threshold binary features
|
|
319
|
+
best_features_threshold = threshold_binary_values(best_features, binary_number)
|
|
320
|
+
|
|
321
|
+
for i, best_feature in enumerate(best_features_threshold):
|
|
322
|
+
all_best_features[i].append(best_feature)
|
|
323
|
+
all_best_targets.append(best_target)
|
|
324
|
+
|
|
325
|
+
# name features
|
|
326
|
+
all_best_features_named = {name: list_values for name, list_values in zip(names, all_best_features)}
|
|
327
|
+
all_best_targets_named = {target_name: all_best_targets}
|
|
328
|
+
|
|
329
|
+
# save results
|
|
330
|
+
_save_results(all_best_features_named, all_best_targets_named, save_dir=save_results_dir, target_name=target_name)
|
|
331
|
+
|
|
332
|
+
return all_best_features_named, all_best_targets_named # type: ignore
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def info():
|
|
336
|
+
_script_info(__all__)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _pso(func: ObjectiveFunction,
|
|
340
|
+
lb: np.ndarray,
|
|
341
|
+
ub: np.ndarray,
|
|
342
|
+
device: torch.device,
|
|
343
|
+
swarmsize=100,
|
|
344
|
+
maxiter=100,
|
|
345
|
+
omega = 0.729, # Clerc and Kennedy’s constriction coefficient
|
|
346
|
+
phip = 1.49445, # Clerc and Kennedy’s constriction coefficient
|
|
347
|
+
phig = 1.49445, # Clerc and Kennedy’s constriction coefficient
|
|
348
|
+
tolerance = 1e-8,
|
|
349
|
+
particle_output=False,
|
|
350
|
+
seed: Optional[int] = None):
|
|
351
|
+
"""
|
|
352
|
+
Internal PSO implementation using PyTorch tensors for acceleration on CUDA or MPS devices.
|
|
353
|
+
|
|
354
|
+
Parameters
|
|
355
|
+
----------
|
|
356
|
+
func : callable
|
|
357
|
+
Callable objective function with batched evaluation support. Must accept a 2D NumPy array
|
|
358
|
+
of shape (n_particles, n_features) and return a 1D NumPy array of shape (n_particles,).
|
|
359
|
+
|
|
360
|
+
lb : np.ndarray
|
|
361
|
+
Lower bounds for each feature (1D array of length n_features).
|
|
362
|
+
|
|
363
|
+
ub : np.ndarray
|
|
364
|
+
Upper bounds for each feature (1D array of length n_features).
|
|
365
|
+
|
|
366
|
+
swarmsize : int
|
|
367
|
+
Number of particles in the swarm (i.e., batch size per iteration).
|
|
368
|
+
|
|
369
|
+
maxiter : int
|
|
370
|
+
Number of iterations to perform (i.e., optimization steps).
|
|
371
|
+
|
|
372
|
+
omega : float
|
|
373
|
+
Inertia weight controlling velocity retention across iterations.
|
|
374
|
+
- Typical range: [0.4, 0.9]
|
|
375
|
+
- Lower values encourage convergence, higher values promote exploration.
|
|
376
|
+
- The default value (0.729) comes from Clerc & Kennedy's constriction method.
|
|
377
|
+
|
|
378
|
+
phip : float
|
|
379
|
+
Cognitive acceleration coefficient.
|
|
380
|
+
- Controls how strongly particles are pulled toward their own best-known positions.
|
|
381
|
+
- Typical range: [0.5, 2.5]
|
|
382
|
+
- Default from Clerc & Kennedy's recommended setting.
|
|
383
|
+
|
|
384
|
+
phig : float
|
|
385
|
+
Social acceleration coefficient.
|
|
386
|
+
- Controls how strongly particles are pulled toward the swarm's global best.
|
|
387
|
+
- Typical range: [0.5, 2.5]
|
|
388
|
+
- Default from Clerc & Kennedy's recommended setting.
|
|
389
|
+
|
|
390
|
+
particle_output : bool, default=False
|
|
391
|
+
If True, returns the full history of particle positions and objective scores at each iteration.
|
|
392
|
+
|
|
393
|
+
seed : int or None, default=None
|
|
394
|
+
Random seed for reproducibility. If None, defaults to 42.
|
|
395
|
+
|
|
396
|
+
Returns
|
|
397
|
+
-------
|
|
398
|
+
best_position : np.ndarray
|
|
399
|
+
1D array of shape (n_features,) representing the best solution found.
|
|
400
|
+
|
|
401
|
+
best_score : float
|
|
402
|
+
Objective value at `best_position`.
|
|
403
|
+
|
|
404
|
+
history_positions : list[np.ndarray], optional
|
|
405
|
+
Only returned if `particle_output=True`. List of particle positions per iteration.
|
|
406
|
+
Each element has shape (swarmsize, n_features).
|
|
407
|
+
|
|
408
|
+
history_scores : list[np.ndarray], optional
|
|
409
|
+
Only returned if `particle_output=True`. List of objective scores per iteration.
|
|
410
|
+
Each element has shape (swarmsize,).
|
|
411
|
+
"""
|
|
412
|
+
if seed is not None:
|
|
413
|
+
torch.manual_seed(seed)
|
|
414
|
+
|
|
415
|
+
ndim = len(lb)
|
|
416
|
+
lb_t = torch.tensor(lb, dtype=torch.float32, device=device, requires_grad=False)
|
|
417
|
+
ub_t = torch.tensor(ub, dtype=torch.float32, device=device, requires_grad=False)
|
|
418
|
+
|
|
419
|
+
# Initialize positions and velocities
|
|
420
|
+
r = torch.rand((swarmsize, ndim), device=device, requires_grad=False)
|
|
421
|
+
positions = lb_t + r * (ub_t - lb_t) # shape: (swarmsize, ndim)
|
|
422
|
+
velocities = torch.zeros_like(positions, requires_grad=False)
|
|
423
|
+
|
|
424
|
+
# Initialize best positions and scores
|
|
425
|
+
personal_best_positions = positions.clone()
|
|
426
|
+
personal_best_scores = torch.full((swarmsize,), float('inf'), device=device, requires_grad=False)
|
|
427
|
+
|
|
428
|
+
global_best_score = float('inf')
|
|
429
|
+
global_best_position = torch.zeros(ndim, device=device, requires_grad=False)
|
|
430
|
+
|
|
431
|
+
# History (optional)
|
|
432
|
+
if particle_output:
|
|
433
|
+
history_positions = []
|
|
434
|
+
history_scores = []
|
|
435
|
+
|
|
436
|
+
# Main loop
|
|
437
|
+
previous_best_score = float('inf')
|
|
438
|
+
progress = trange(maxiter, desc="PSO", unit="iter", leave=True) #tqdm bar
|
|
439
|
+
with torch.no_grad():
|
|
440
|
+
for i in progress:
|
|
441
|
+
# Evaluate objective for all particles
|
|
442
|
+
positions_np = positions.detach().cpu().numpy() # shape: (swarmsize, n_features)
|
|
443
|
+
scores_np = func(positions_np) # shape: (swarmsize,)
|
|
444
|
+
scores = torch.tensor(scores_np, device=device, dtype=torch.float32)
|
|
445
|
+
|
|
446
|
+
# Update personal bests
|
|
447
|
+
improved = scores < personal_best_scores
|
|
448
|
+
personal_best_scores = torch.where(improved, scores, personal_best_scores)
|
|
449
|
+
personal_best_positions = torch.where(improved[:, None], positions, personal_best_positions)
|
|
450
|
+
|
|
451
|
+
# Update global best
|
|
452
|
+
min_score, min_idx = torch.min(personal_best_scores, dim=0)
|
|
453
|
+
if min_score < global_best_score:
|
|
454
|
+
global_best_score = min_score.item()
|
|
455
|
+
global_best_position = personal_best_positions[min_idx].clone()
|
|
456
|
+
|
|
457
|
+
# Early stopping criteria
|
|
458
|
+
if abs(previous_best_score - global_best_score) < tolerance:
|
|
459
|
+
progress.set_description(f"PSO (early stop at iteration {i+1})")
|
|
460
|
+
break
|
|
461
|
+
previous_best_score = global_best_score
|
|
462
|
+
|
|
463
|
+
# Optional: track history for debugging/visualization
|
|
464
|
+
if particle_output:
|
|
465
|
+
history_positions.append(positions.detach().cpu().numpy())
|
|
466
|
+
history_scores.append(scores_np)
|
|
467
|
+
|
|
468
|
+
# Velocity update
|
|
469
|
+
rp = torch.rand((swarmsize, ndim), device=device, requires_grad=False)
|
|
470
|
+
rg = torch.rand((swarmsize, ndim), device=device, requires_grad=False)
|
|
471
|
+
|
|
472
|
+
cognitive = phip * rp * (personal_best_positions - positions)
|
|
473
|
+
social = phig * rg * (global_best_position - positions)
|
|
474
|
+
velocities = omega * velocities + cognitive + social
|
|
475
|
+
|
|
476
|
+
# Position update
|
|
477
|
+
positions = positions + velocities
|
|
478
|
+
|
|
479
|
+
# Clamp to search space bounds
|
|
480
|
+
positions = torch.max(positions, lb_t)
|
|
481
|
+
positions = torch.min(positions, ub_t)
|
|
482
|
+
|
|
483
|
+
# Move to CPU and convert to NumPy
|
|
484
|
+
best_position = global_best_position.detach().cpu().numpy()
|
|
485
|
+
best_score = global_best_score
|
|
486
|
+
|
|
487
|
+
if particle_output:
|
|
488
|
+
return best_position, best_score, history_positions, history_scores
|
|
489
|
+
else:
|
|
490
|
+
return best_position, best_score
|
|
@@ -21,6 +21,7 @@ __all__ = [
|
|
|
21
21
|
"normalize_mixed_list",
|
|
22
22
|
"sanitize_filename",
|
|
23
23
|
"threshold_binary_values",
|
|
24
|
+
"threshold_binary_values_batch",
|
|
24
25
|
"serialize_object",
|
|
25
26
|
"deserialize_object",
|
|
26
27
|
"distribute_datasets_by_target"
|
|
@@ -356,6 +357,39 @@ def threshold_binary_values(
|
|
|
356
357
|
return tuple(result)
|
|
357
358
|
else:
|
|
358
359
|
return result
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def threshold_binary_values_batch(
|
|
363
|
+
input_array: np.ndarray,
|
|
364
|
+
binary_values: int
|
|
365
|
+
) -> np.ndarray:
|
|
366
|
+
"""
|
|
367
|
+
Threshold the last `binary_values` columns of a 2D NumPy array to binary {0,1} using 0.5 cutoff.
|
|
368
|
+
|
|
369
|
+
Parameters
|
|
370
|
+
----------
|
|
371
|
+
input_array : np.ndarray
|
|
372
|
+
2D array with shape (batch_size, n_features).
|
|
373
|
+
binary_values : int
|
|
374
|
+
Number of binary features located at the END of each row.
|
|
375
|
+
|
|
376
|
+
Returns
|
|
377
|
+
-------
|
|
378
|
+
np.ndarray
|
|
379
|
+
Thresholded array, same shape as input.
|
|
380
|
+
"""
|
|
381
|
+
assert input_array.ndim == 2, f"Expected 2D array, got {input_array.ndim}D"
|
|
382
|
+
batch_size, total_features = input_array.shape
|
|
383
|
+
assert 0 <= binary_values <= total_features, "binary_values out of valid range"
|
|
384
|
+
|
|
385
|
+
if binary_values == 0:
|
|
386
|
+
return input_array.copy()
|
|
387
|
+
|
|
388
|
+
cont_part = input_array[:, :-binary_values] if binary_values < total_features else np.empty((batch_size, 0))
|
|
389
|
+
bin_part = input_array[:, -binary_values:] > 0.5
|
|
390
|
+
bin_part = bin_part.astype(np.int32)
|
|
391
|
+
|
|
392
|
+
return np.hstack([cont_part, bin_part])
|
|
359
393
|
|
|
360
394
|
|
|
361
395
|
def serialize_object(obj: Any, save_dir: str, filename: str, verbose: bool=True, raise_on_error: bool=False) -> Optional[str]:
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dragon-ml-toolbox"
|
|
3
|
-
version = "
|
|
3
|
+
version = "2.0.0"
|
|
4
4
|
description = "A collection of tools for data science and machine learning projects"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Karl Loza", email = "luigiloza@gmail.com" }
|
|
7
7
|
]
|
|
8
8
|
readme = "README.md"
|
|
9
|
-
requires-python = ">=3.
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
10
|
license = "MIT"
|
|
11
11
|
classifiers = [
|
|
12
12
|
"Programming Language :: Python :: 3",
|
|
@@ -32,7 +32,9 @@ dependencies = [
|
|
|
32
32
|
"joblib",
|
|
33
33
|
"xgboost",
|
|
34
34
|
"lightgbm<=4.5.0",
|
|
35
|
-
"shap"
|
|
35
|
+
"shap",
|
|
36
|
+
"tqdm>=4.0",
|
|
37
|
+
"Pillow"
|
|
36
38
|
]
|
|
37
39
|
|
|
38
40
|
[project.urls]
|
|
@@ -42,7 +44,6 @@ Changelog = "https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md"
|
|
|
42
44
|
[project.optional-dependencies]
|
|
43
45
|
pytorch = [
|
|
44
46
|
"torch",
|
|
45
|
-
"Pillow",
|
|
46
47
|
"torchvision"
|
|
47
48
|
]
|
|
48
49
|
|
|
File without changes
|
{dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-1.4.7 → dragon_ml_toolbox-2.0.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|