maxwailab 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maxwailab-1.0.0/LICENSE.txt +21 -0
- maxwailab-1.0.0/PKG-INFO +275 -0
- maxwailab-1.0.0/README.md +257 -0
- maxwailab-1.0.0/pyproject.toml +32 -0
- maxwailab-1.0.0/setup.cfg +4 -0
- maxwailab-1.0.0/src/maxwailab/__init__.py +16 -0
- maxwailab-1.0.0/src/maxwailab/binning.py +312 -0
- maxwailab-1.0.0/src/maxwailab/feature_selection.py +478 -0
- maxwailab-1.0.0/src/maxwailab/hyperparameter_analysis.py +137 -0
- maxwailab-1.0.0/src/maxwailab.egg-info/PKG-INFO +275 -0
- maxwailab-1.0.0/src/maxwailab.egg-info/SOURCES.txt +12 -0
- maxwailab-1.0.0/src/maxwailab.egg-info/dependency_links.txt +1 -0
- maxwailab-1.0.0/src/maxwailab.egg-info/requires.txt +7 -0
- maxwailab-1.0.0/src/maxwailab.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Max Wienandts
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
maxwailab-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: maxwailab
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Bootstrap-based model stability and supervised binning toolkit
|
|
5
|
+
Author: Max Wienandts
|
|
6
|
+
Project-URL: Homepage, https://github.com/MaxWienandts/maxwailab
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE.txt
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: scikit-learn
|
|
13
|
+
Requires-Dist: lightgbm
|
|
14
|
+
Requires-Dist: matplotlib
|
|
15
|
+
Requires-Dist: seaborn
|
|
16
|
+
Requires-Dist: tqdm
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# Bootstrap ML Diagnostics
|
|
20
|
+
|
|
21
|
+
A lightweight toolkit for **statistically robust model diagnostics** using **bootstrap resampling**, with utilities for:
|
|
22
|
+
|
|
23
|
+
* supervised tree binning
|
|
24
|
+
* bootstrap-based feature selection
|
|
25
|
+
* model stability analysis
|
|
26
|
+
* hyperparameter sensitivity analysis
|
|
27
|
+
|
|
28
|
+
The library focuses on **reducing overfitting and improving model interpretability** through **bootstrap distributions rather than single-point estimates**.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
# Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install maxwailab
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
or
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install git+https://github.com/MaxWienandts/maxailab.git
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
# Core Philosophy
|
|
47
|
+
|
|
48
|
+
Most ML workflows rely on **single train/validation splits**.
|
|
49
|
+
|
|
50
|
+
This library instead uses **bootstrap resampling** to estimate:
|
|
51
|
+
|
|
52
|
+
* performance **distributions**
|
|
53
|
+
* feature **selection stability**
|
|
54
|
+
* hyperparameter **robustness**
|
|
55
|
+
|
|
56
|
+
Benefits:
|
|
57
|
+
|
|
58
|
+
* reduces variance from a single split
|
|
59
|
+
* identifies unstable variables
|
|
60
|
+
* provides confidence intervals for model performance
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
# Workflow Overview
|
|
65
|
+
|
|
66
|
+
Typical modeling workflow using this library:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
1️⃣ Supervised binning (optional)
|
|
70
|
+
|
|
71
|
+
tree_supervised_binning
|
|
72
|
+
bootstrap_tree_binning_auc_analysis
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
2️⃣ Feature selection
|
|
76
|
+
|
|
77
|
+
bootstrap_lightgbm_forward_selection
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
3️⃣ Diagnostics
|
|
81
|
+
|
|
82
|
+
performance_forward_selection_boxplot
|
|
83
|
+
variable_frequency_forward_selection
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
4️⃣ Extract best variables
|
|
87
|
+
|
|
88
|
+
top_k_forward_selection_variables
|
|
89
|
+
top_k_variables_by_forward_selection_boxplot
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
5️⃣ Hyperparameter analysis
|
|
93
|
+
|
|
94
|
+
lightgbm_hyperparameter_auc_curve_bootstrap
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
# Example Workflow
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import maxwailab
|
|
103
|
+
|
|
104
|
+
# --------------------------------
|
|
105
|
+
# Forward Selection with Bootstrap
|
|
106
|
+
# --------------------------------
|
|
107
|
+
|
|
108
|
+
result_bootstrap = maxwailab.bootstrap_lightgbm_forward_selection(
|
|
109
|
+
df=data,
|
|
110
|
+
target="target",
|
|
111
|
+
n_bootstrap=30,
|
|
112
|
+
n_max_variables=15,
|
|
113
|
+
metric_to_optimize="auc_roc",
|
|
114
|
+
hyperparameters=lgb_params
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Analyze performance stability
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
bml.performance_forward_selection_boxplot(
|
|
124
|
+
result_bootstrap["auc_roc"],
|
|
125
|
+
"AUC"
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
This visualizes how performance behaves as variables are added.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Variable selection stability
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
maxwailab.variable_frequency_forward_selection(
|
|
137
|
+
result_bootstrap["variables"],
|
|
138
|
+
n_bootstraps=30
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Heatmap showing **how frequently variables appear in models of different sizes**.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Extract best variables
|
|
147
|
+
|
|
148
|
+
### Based on selection frequency
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
maxwailab.top_k_forward_selection_variables(
|
|
152
|
+
result_bootstrap["variables"],
|
|
153
|
+
n_bootstraps=30,
|
|
154
|
+
k=10
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Based on best model performance
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
variables, auc = maxwailab.top_k_variables_by_forward_selection_boxplot(
|
|
162
|
+
result_bootstrap,
|
|
163
|
+
k=6,
|
|
164
|
+
metric="auc_roc"
|
|
165
|
+
)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
# Tree-based Supervised Binning
|
|
171
|
+
|
|
172
|
+
Supervised binning using decision trees.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from maxwailab import tree_supervised_binning
|
|
176
|
+
|
|
177
|
+
tree_supervised_binning(
|
|
178
|
+
df=data,
|
|
179
|
+
feature="age",
|
|
180
|
+
target="target",
|
|
181
|
+
max_leaf_nodes=5
|
|
182
|
+
)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## Bootstrap Binning Stability
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
bootstrap_tree_binning_auc_analysis(
|
|
191
|
+
df_train,
|
|
192
|
+
df_val,
|
|
193
|
+
feature="age",
|
|
194
|
+
target="target"
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Evaluates how **binning performance varies across bootstrap samples**.
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
# Hyperparameter Sensitivity Analysis
|
|
203
|
+
|
|
204
|
+
Evaluate how model performance reacts to hyperparameter changes.
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
lightgbm_hyperparameter_auc_curve_bootstrap(
|
|
208
|
+
X_train,
|
|
209
|
+
y_train,
|
|
210
|
+
X_val,
|
|
211
|
+
y_val,
|
|
212
|
+
hyperparameters=lgb_params,
|
|
213
|
+
hyperparameter_name="num_leaves",
|
|
214
|
+
hyperparameter_values=[5,10,20,40],
|
|
215
|
+
n_bootstrap=50
|
|
216
|
+
)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Bootstrap is applied **only to the training set** while keeping validation **fixed (out-of-time)**.
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
# Example Output
|
|
224
|
+
|
|
225
|
+
The library produces:
|
|
226
|
+
|
|
227
|
+
* performance **distributions**
|
|
228
|
+
* **boxplots**
|
|
229
|
+
* **stability heatmaps**
|
|
230
|
+
* **hyperparameter sensitivity curves**
|
|
231
|
+
|
|
232
|
+
These diagnostics help detect:
|
|
233
|
+
|
|
234
|
+
* overfitting
|
|
235
|
+
* unstable features
|
|
236
|
+
* fragile hyperparameters
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
# Module Structure
|
|
241
|
+
|
|
242
|
+
```
|
|
243
|
+
maxwailab
|
|
244
|
+
│
|
|
245
|
+
├── binning
|
|
246
|
+
│ ├── tree_supervised_binning
|
|
247
|
+
│ └── bootstrap_tree_binning_auc_analysis
|
|
248
|
+
│
|
|
249
|
+
├── feature_selection
|
|
250
|
+
│ ├── bootstrap_lightgbm_forward_selection
|
|
251
|
+
│ ├── performance_forward_selection_boxplot
|
|
252
|
+
│ ├── variable_frequency_forward_selection
|
|
253
|
+
│ ├── top_k_forward_selection_variables
|
|
254
|
+
│ └── top_k_variables_by_forward_selection_boxplot
|
|
255
|
+
│
|
|
256
|
+
└── hyperparameter_analysis
|
|
257
|
+
└── lightgbm_hyperparameter_auc_curve_bootstrap
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
# When to Use This Library
|
|
263
|
+
|
|
264
|
+
This library is particularly useful for:
|
|
265
|
+
|
|
266
|
+
* **credit risk models**
|
|
267
|
+
* **tabular ML problems**
|
|
268
|
+
* **high-stakes predictive modeling**
|
|
269
|
+
* **interpretable ML workflows**
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
# License
|
|
274
|
+
|
|
275
|
+
MIT License
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
# Bootstrap ML Diagnostics
|
|
2
|
+
|
|
3
|
+
A lightweight toolkit for **statistically robust model diagnostics** using **bootstrap resampling**, with utilities for:
|
|
4
|
+
|
|
5
|
+
* supervised tree binning
|
|
6
|
+
* bootstrap-based feature selection
|
|
7
|
+
* model stability analysis
|
|
8
|
+
* hyperparameter sensitivity analysis
|
|
9
|
+
|
|
10
|
+
The library focuses on **reducing overfitting and improving model interpretability** through **bootstrap distributions rather than single-point estimates**.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install maxwailab
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
or
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install git+https://github.com/MaxWienandts/maxailab.git
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
# Core Philosophy
|
|
29
|
+
|
|
30
|
+
Most ML workflows rely on **single train/validation splits**.
|
|
31
|
+
|
|
32
|
+
This library instead uses **bootstrap resampling** to estimate:
|
|
33
|
+
|
|
34
|
+
* performance **distributions**
|
|
35
|
+
* feature **selection stability**
|
|
36
|
+
* hyperparameter **robustness**
|
|
37
|
+
|
|
38
|
+
Benefits:
|
|
39
|
+
|
|
40
|
+
* reduces variance from a single split
|
|
41
|
+
* identifies unstable variables
|
|
42
|
+
* provides confidence intervals for model performance
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
# Workflow Overview
|
|
47
|
+
|
|
48
|
+
Typical modeling workflow using this library:
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
1️⃣ Supervised binning (optional)
|
|
52
|
+
|
|
53
|
+
tree_supervised_binning
|
|
54
|
+
bootstrap_tree_binning_auc_analysis
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
2️⃣ Feature selection
|
|
58
|
+
|
|
59
|
+
bootstrap_lightgbm_forward_selection
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
3️⃣ Diagnostics
|
|
63
|
+
|
|
64
|
+
performance_forward_selection_boxplot
|
|
65
|
+
variable_frequency_forward_selection
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
4️⃣ Extract best variables
|
|
69
|
+
|
|
70
|
+
top_k_forward_selection_variables
|
|
71
|
+
top_k_variables_by_forward_selection_boxplot
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
5️⃣ Hyperparameter analysis
|
|
75
|
+
|
|
76
|
+
lightgbm_hyperparameter_auc_curve_bootstrap
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
# Example Workflow
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import maxwailab
|
|
85
|
+
|
|
86
|
+
# --------------------------------
|
|
87
|
+
# Forward Selection with Bootstrap
|
|
88
|
+
# --------------------------------
|
|
89
|
+
|
|
90
|
+
result_bootstrap = maxwailab.bootstrap_lightgbm_forward_selection(
|
|
91
|
+
df=data,
|
|
92
|
+
target="target",
|
|
93
|
+
n_bootstrap=30,
|
|
94
|
+
n_max_variables=15,
|
|
95
|
+
metric_to_optimize="auc_roc",
|
|
96
|
+
hyperparameters=lgb_params
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Analyze performance stability
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
bml.performance_forward_selection_boxplot(
|
|
106
|
+
result_bootstrap["auc_roc"],
|
|
107
|
+
"AUC"
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
This visualizes how performance behaves as variables are added.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Variable selection stability
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
maxwailab.variable_frequency_forward_selection(
|
|
119
|
+
result_bootstrap["variables"],
|
|
120
|
+
n_bootstraps=30
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Heatmap showing **how frequently variables appear in models of different sizes**.
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Extract best variables
|
|
129
|
+
|
|
130
|
+
### Based on selection frequency
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
maxwailab.top_k_forward_selection_variables(
|
|
134
|
+
result_bootstrap["variables"],
|
|
135
|
+
n_bootstraps=30,
|
|
136
|
+
k=10
|
|
137
|
+
)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Based on best model performance
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
variables, auc = maxwailab.top_k_variables_by_forward_selection_boxplot(
|
|
144
|
+
result_bootstrap,
|
|
145
|
+
k=6,
|
|
146
|
+
metric="auc_roc"
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
# Tree-based Supervised Binning
|
|
153
|
+
|
|
154
|
+
Supervised binning using decision trees.
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from maxwailab import tree_supervised_binning
|
|
158
|
+
|
|
159
|
+
tree_supervised_binning(
|
|
160
|
+
df=data,
|
|
161
|
+
feature="age",
|
|
162
|
+
target="target",
|
|
163
|
+
max_leaf_nodes=5
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Bootstrap Binning Stability
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
bootstrap_tree_binning_auc_analysis(
|
|
173
|
+
df_train,
|
|
174
|
+
df_val,
|
|
175
|
+
feature="age",
|
|
176
|
+
target="target"
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Evaluates how **binning performance varies across bootstrap samples**.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
# Hyperparameter Sensitivity Analysis
|
|
185
|
+
|
|
186
|
+
Evaluate how model performance reacts to hyperparameter changes.
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
lightgbm_hyperparameter_auc_curve_bootstrap(
|
|
190
|
+
X_train,
|
|
191
|
+
y_train,
|
|
192
|
+
X_val,
|
|
193
|
+
y_val,
|
|
194
|
+
hyperparameters=lgb_params,
|
|
195
|
+
hyperparameter_name="num_leaves",
|
|
196
|
+
hyperparameter_values=[5,10,20,40],
|
|
197
|
+
n_bootstrap=50
|
|
198
|
+
)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Bootstrap is applied **only to the training set** while keeping validation **fixed (out-of-time)**.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
# Example Output
|
|
206
|
+
|
|
207
|
+
The library produces:
|
|
208
|
+
|
|
209
|
+
* performance **distributions**
|
|
210
|
+
* **boxplots**
|
|
211
|
+
* **stability heatmaps**
|
|
212
|
+
* **hyperparameter sensitivity curves**
|
|
213
|
+
|
|
214
|
+
These diagnostics help detect:
|
|
215
|
+
|
|
216
|
+
* overfitting
|
|
217
|
+
* unstable features
|
|
218
|
+
* fragile hyperparameters
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
# Module Structure
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
maxwailab
|
|
226
|
+
│
|
|
227
|
+
├── binning
|
|
228
|
+
│ ├── tree_supervised_binning
|
|
229
|
+
│ └── bootstrap_tree_binning_auc_analysis
|
|
230
|
+
│
|
|
231
|
+
├── feature_selection
|
|
232
|
+
│ ├── bootstrap_lightgbm_forward_selection
|
|
233
|
+
│ ├── performance_forward_selection_boxplot
|
|
234
|
+
│ ├── variable_frequency_forward_selection
|
|
235
|
+
│ ├── top_k_forward_selection_variables
|
|
236
|
+
│ └── top_k_variables_by_forward_selection_boxplot
|
|
237
|
+
│
|
|
238
|
+
└── hyperparameter_analysis
|
|
239
|
+
└── lightgbm_hyperparameter_auc_curve_bootstrap
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
# When to Use This Library
|
|
245
|
+
|
|
246
|
+
This library is particularly useful for:
|
|
247
|
+
|
|
248
|
+
* **credit risk models**
|
|
249
|
+
* **tabular ML problems**
|
|
250
|
+
* **high-stakes predictive modeling**
|
|
251
|
+
* **interpretable ML workflows**
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
# License
|
|
256
|
+
|
|
257
|
+
MIT License
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "maxwailab"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Bootstrap-based model stability and supervised binning toolkit"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name="Max Wienandts" }
|
|
11
|
+
]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.10"
|
|
14
|
+
|
|
15
|
+
dependencies = [
|
|
16
|
+
"numpy",
|
|
17
|
+
"pandas",
|
|
18
|
+
"scikit-learn",
|
|
19
|
+
"lightgbm",
|
|
20
|
+
"matplotlib",
|
|
21
|
+
"seaborn",
|
|
22
|
+
"tqdm"
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.setuptools]
|
|
26
|
+
package-dir = {"" = "src"}
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.packages.find]
|
|
29
|
+
where = ["src"]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/MaxWienandts/maxwailab"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .binning import (
|
|
2
|
+
tree_supervised_binning,
|
|
3
|
+
bootstrap_tree_binning_auc_analysis,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
from .feature_selection import (
|
|
7
|
+
bootstrap_lightgbm_forward_selection,
|
|
8
|
+
performance_forward_selection_boxplot,
|
|
9
|
+
variable_frequency_forward_selection,
|
|
10
|
+
top_k_forward_selection_variables_by_frequency_usage,
|
|
11
|
+
top_k_variables_by__forward_selection_boxplot,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from .hyperparameter_analysis import (
|
|
15
|
+
lightgbm_hyperparameter_auc_curve_bootstrap
|
|
16
|
+
)
|