maxwailab 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Max Wienandts
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,275 @@
1
+ Metadata-Version: 2.4
2
+ Name: maxwailab
3
+ Version: 1.0.0
4
+ Summary: Bootstrap-based model stability and supervised binning toolkit
5
+ Author: Max Wienandts
6
+ Project-URL: Homepage, https://github.com/MaxWienandts/maxwailab
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE.txt
10
+ Requires-Dist: numpy
11
+ Requires-Dist: pandas
12
+ Requires-Dist: scikit-learn
13
+ Requires-Dist: lightgbm
14
+ Requires-Dist: matplotlib
15
+ Requires-Dist: seaborn
16
+ Requires-Dist: tqdm
17
+ Dynamic: license-file
18
+
19
+ # Bootstrap ML Diagnostics
20
+
21
+ A lightweight toolkit for **statistically robust model diagnostics** using **bootstrap resampling**, with utilities for:
22
+
23
+ * supervised tree binning
24
+ * bootstrap-based feature selection
25
+ * model stability analysis
26
+ * hyperparameter sensitivity analysis
27
+
28
+ The library focuses on **reducing overfitting and improving model interpretability** through **bootstrap distributions rather than single-point estimates**.
29
+
30
+ ---
31
+
32
+ # Installation
33
+
34
+ ```bash
35
+ pip install maxwailab
36
+ ```
37
+
38
+ or
39
+
40
+ ```bash
41
+ pip install git+https://github.com/MaxWienandts/maxailab.git
42
+ ```
43
+
44
+ ---
45
+
46
+ # Core Philosophy
47
+
48
+ Most ML workflows rely on **single train/validation splits**.
49
+
50
+ This library instead uses **bootstrap resampling** to estimate:
51
+
52
+ * performance **distributions**
53
+ * feature **selection stability**
54
+ * hyperparameter **robustness**
55
+
56
+ Benefits:
57
+
58
+ * reduces variance from a single split
59
+ * identifies unstable variables
60
+ * provides confidence intervals for model performance
61
+
62
+ ---
63
+
64
+ # Workflow Overview
65
+
66
+ Typical modeling workflow using this library:
67
+
68
+ ```
69
+ 1️⃣ Supervised binning (optional)
70
+
71
+ tree_supervised_binning
72
+ bootstrap_tree_binning_auc_analysis
73
+
74
+
75
+ 2️⃣ Feature selection
76
+
77
+ bootstrap_lightgbm_forward_selection
78
+
79
+
80
+ 3️⃣ Diagnostics
81
+
82
+ performance_forward_selection_boxplot
83
+ variable_frequency_forward_selection
84
+
85
+
86
+ 4️⃣ Extract best variables
87
+
88
+ top_k_forward_selection_variables
89
+ top_k_variables_by_forward_selection_boxplot
90
+
91
+
92
+ 5️⃣ Hyperparameter analysis
93
+
94
+ lightgbm_hyperparameter_auc_curve_bootstrap
95
+ ```
96
+
97
+ ---
98
+
99
+ # Example Workflow
100
+
101
+ ```python
102
+ import maxwailab
103
+
104
+ # --------------------------------
105
+ # Forward Selection with Bootstrap
106
+ # --------------------------------
107
+
108
+ result_bootstrap = maxwailab.bootstrap_lightgbm_forward_selection(
109
+ df=data,
110
+ target="target",
111
+ n_bootstrap=30,
112
+ n_max_variables=15,
113
+ metric_to_optimize="auc_roc",
114
+ hyperparameters=lgb_params
115
+ )
116
+ ```
117
+
118
+ ---
119
+
120
+ ## Analyze performance stability
121
+
122
+ ```python
123
+ bml.performance_forward_selection_boxplot(
124
+ result_bootstrap["auc_roc"],
125
+ "AUC"
126
+ )
127
+ ```
128
+
129
+ This visualizes how performance behaves as variables are added.
130
+
131
+ ---
132
+
133
+ ## Variable selection stability
134
+
135
+ ```python
136
+ maxwailab.variable_frequency_forward_selection(
137
+ result_bootstrap["variables"],
138
+ n_bootstraps=30
139
+ )
140
+ ```
141
+
142
+ Heatmap showing **how frequently variables appear in models of different sizes**.
143
+
144
+ ---
145
+
146
+ ## Extract best variables
147
+
148
+ ### Based on selection frequency
149
+
150
+ ```python
151
+ maxwailab.top_k_forward_selection_variables(
152
+ result_bootstrap["variables"],
153
+ n_bootstraps=30,
154
+ k=10
155
+ )
156
+ ```
157
+
158
+ ### Based on best model performance
159
+
160
+ ```python
161
+ variables, auc = maxwailab.top_k_variables_by_forward_selection_boxplot(
162
+ result_bootstrap,
163
+ k=6,
164
+ metric="auc_roc"
165
+ )
166
+ ```
167
+
168
+ ---
169
+
170
+ # Tree-based Supervised Binning
171
+
172
+ Supervised binning using decision trees.
173
+
174
+ ```python
175
+ from maxwailab import tree_supervised_binning
176
+
177
+ tree_supervised_binning(
178
+ df=data,
179
+ feature="age",
180
+ target="target",
181
+ max_leaf_nodes=5
182
+ )
183
+ ```
184
+
185
+ ---
186
+
187
+ ## Bootstrap Binning Stability
188
+
189
+ ```python
190
+ bootstrap_tree_binning_auc_analysis(
191
+ df_train,
192
+ df_val,
193
+ feature="age",
194
+ target="target"
195
+ )
196
+ ```
197
+
198
+ Evaluates how **binning performance varies across bootstrap samples**.
199
+
200
+ ---
201
+
202
+ # Hyperparameter Sensitivity Analysis
203
+
204
+ Evaluate how model performance reacts to hyperparameter changes.
205
+
206
+ ```python
207
+ lightgbm_hyperparameter_auc_curve_bootstrap(
208
+ X_train,
209
+ y_train,
210
+ X_val,
211
+ y_val,
212
+ hyperparameters=lgb_params,
213
+ hyperparameter_name="num_leaves",
214
+ hyperparameter_values=[5,10,20,40],
215
+ n_bootstrap=50
216
+ )
217
+ ```
218
+
219
+ Bootstrap is applied **only to the training set** while keeping validation **fixed (out-of-time)**.
220
+
221
+ ---
222
+
223
+ # Example Output
224
+
225
+ The library produces:
226
+
227
+ * performance **distributions**
228
+ * **boxplots**
229
+ * **stability heatmaps**
230
+ * **hyperparameter sensitivity curves**
231
+
232
+ These diagnostics help detect:
233
+
234
+ * overfitting
235
+ * unstable features
236
+ * fragile hyperparameters
237
+
238
+ ---
239
+
240
+ # Module Structure
241
+
242
+ ```
243
+ maxwailab
244
+
245
+ ├── binning
246
+ │ ├── tree_supervised_binning
247
+ │ └── bootstrap_tree_binning_auc_analysis
248
+
249
+ ├── feature_selection
250
+ │ ├── bootstrap_lightgbm_forward_selection
251
+ │ ├── performance_forward_selection_boxplot
252
+ │ ├── variable_frequency_forward_selection
253
+ │ ├── top_k_forward_selection_variables
254
+ │ └── top_k_variables_by_forward_selection_boxplot
255
+
256
+ └── hyperparameter_analysis
257
+ └── lightgbm_hyperparameter_auc_curve_bootstrap
258
+ ```
259
+
260
+ ---
261
+
262
+ # When to Use This Library
263
+
264
+ This library is particularly useful for:
265
+
266
+ * **credit risk models**
267
+ * **tabular ML problems**
268
+ * **high-stakes predictive modeling**
269
+ * **interpretable ML workflows**
270
+
271
+ ---
272
+
273
+ # License
274
+
275
+ MIT License
@@ -0,0 +1,257 @@
1
+ # Bootstrap ML Diagnostics
2
+
3
+ A lightweight toolkit for **statistically robust model diagnostics** using **bootstrap resampling**, with utilities for:
4
+
5
+ * supervised tree binning
6
+ * bootstrap-based feature selection
7
+ * model stability analysis
8
+ * hyperparameter sensitivity analysis
9
+
10
+ The library focuses on **reducing overfitting and improving model interpretability** through **bootstrap distributions rather than single-point estimates**.
11
+
12
+ ---
13
+
14
+ # Installation
15
+
16
+ ```bash
17
+ pip install maxwailab
18
+ ```
19
+
20
+ or
21
+
22
+ ```bash
23
+ pip install git+https://github.com/MaxWienandts/maxailab.git
24
+ ```
25
+
26
+ ---
27
+
28
+ # Core Philosophy
29
+
30
+ Most ML workflows rely on **single train/validation splits**.
31
+
32
+ This library instead uses **bootstrap resampling** to estimate:
33
+
34
+ * performance **distributions**
35
+ * feature **selection stability**
36
+ * hyperparameter **robustness**
37
+
38
+ Benefits:
39
+
40
+ * reduces variance from a single split
41
+ * identifies unstable variables
42
+ * provides confidence intervals for model performance
43
+
44
+ ---
45
+
46
+ # Workflow Overview
47
+
48
+ Typical modeling workflow using this library:
49
+
50
+ ```
51
+ 1️⃣ Supervised binning (optional)
52
+
53
+ tree_supervised_binning
54
+ bootstrap_tree_binning_auc_analysis
55
+
56
+
57
+ 2️⃣ Feature selection
58
+
59
+ bootstrap_lightgbm_forward_selection
60
+
61
+
62
+ 3️⃣ Diagnostics
63
+
64
+ performance_forward_selection_boxplot
65
+ variable_frequency_forward_selection
66
+
67
+
68
+ 4️⃣ Extract best variables
69
+
70
+ top_k_forward_selection_variables
71
+ top_k_variables_by_forward_selection_boxplot
72
+
73
+
74
+ 5️⃣ Hyperparameter analysis
75
+
76
+ lightgbm_hyperparameter_auc_curve_bootstrap
77
+ ```
78
+
79
+ ---
80
+
81
+ # Example Workflow
82
+
83
+ ```python
84
+ import maxwailab
85
+
86
+ # --------------------------------
87
+ # Forward Selection with Bootstrap
88
+ # --------------------------------
89
+
90
+ result_bootstrap = maxwailab.bootstrap_lightgbm_forward_selection(
91
+ df=data,
92
+ target="target",
93
+ n_bootstrap=30,
94
+ n_max_variables=15,
95
+ metric_to_optimize="auc_roc",
96
+ hyperparameters=lgb_params
97
+ )
98
+ ```
99
+
100
+ ---
101
+
102
+ ## Analyze performance stability
103
+
104
+ ```python
105
+ bml.performance_forward_selection_boxplot(
106
+ result_bootstrap["auc_roc"],
107
+ "AUC"
108
+ )
109
+ ```
110
+
111
+ This visualizes how performance behaves as variables are added.
112
+
113
+ ---
114
+
115
+ ## Variable selection stability
116
+
117
+ ```python
118
+ maxwailab.variable_frequency_forward_selection(
119
+ result_bootstrap["variables"],
120
+ n_bootstraps=30
121
+ )
122
+ ```
123
+
124
+ Heatmap showing **how frequently variables appear in models of different sizes**.
125
+
126
+ ---
127
+
128
+ ## Extract best variables
129
+
130
+ ### Based on selection frequency
131
+
132
+ ```python
133
+ maxwailab.top_k_forward_selection_variables(
134
+ result_bootstrap["variables"],
135
+ n_bootstraps=30,
136
+ k=10
137
+ )
138
+ ```
139
+
140
+ ### Based on best model performance
141
+
142
+ ```python
143
+ variables, auc = maxwailab.top_k_variables_by_forward_selection_boxplot(
144
+ result_bootstrap,
145
+ k=6,
146
+ metric="auc_roc"
147
+ )
148
+ ```
149
+
150
+ ---
151
+
152
+ # Tree-based Supervised Binning
153
+
154
+ Supervised binning using decision trees.
155
+
156
+ ```python
157
+ from maxwailab import tree_supervised_binning
158
+
159
+ tree_supervised_binning(
160
+ df=data,
161
+ feature="age",
162
+ target="target",
163
+ max_leaf_nodes=5
164
+ )
165
+ ```
166
+
167
+ ---
168
+
169
+ ## Bootstrap Binning Stability
170
+
171
+ ```python
172
+ bootstrap_tree_binning_auc_analysis(
173
+ df_train,
174
+ df_val,
175
+ feature="age",
176
+ target="target"
177
+ )
178
+ ```
179
+
180
+ Evaluates how **binning performance varies across bootstrap samples**.
181
+
182
+ ---
183
+
184
+ # Hyperparameter Sensitivity Analysis
185
+
186
+ Evaluate how model performance reacts to hyperparameter changes.
187
+
188
+ ```python
189
+ lightgbm_hyperparameter_auc_curve_bootstrap(
190
+ X_train,
191
+ y_train,
192
+ X_val,
193
+ y_val,
194
+ hyperparameters=lgb_params,
195
+ hyperparameter_name="num_leaves",
196
+ hyperparameter_values=[5,10,20,40],
197
+ n_bootstrap=50
198
+ )
199
+ ```
200
+
201
+ Bootstrap is applied **only to the training set** while keeping validation **fixed (out-of-time)**.
202
+
203
+ ---
204
+
205
+ # Example Output
206
+
207
+ The library produces:
208
+
209
+ * performance **distributions**
210
+ * **boxplots**
211
+ * **stability heatmaps**
212
+ * **hyperparameter sensitivity curves**
213
+
214
+ These diagnostics help detect:
215
+
216
+ * overfitting
217
+ * unstable features
218
+ * fragile hyperparameters
219
+
220
+ ---
221
+
222
+ # Module Structure
223
+
224
+ ```
225
+ maxwailab
226
+
227
+ ├── binning
228
+ │ ├── tree_supervised_binning
229
+ │ └── bootstrap_tree_binning_auc_analysis
230
+
231
+ ├── feature_selection
232
+ │ ├── bootstrap_lightgbm_forward_selection
233
+ │ ├── performance_forward_selection_boxplot
234
+ │ ├── variable_frequency_forward_selection
235
+ │ ├── top_k_forward_selection_variables
236
+ │ └── top_k_variables_by_forward_selection_boxplot
237
+
238
+ └── hyperparameter_analysis
239
+ └── lightgbm_hyperparameter_auc_curve_bootstrap
240
+ ```
241
+
242
+ ---
243
+
244
+ # When to Use This Library
245
+
246
+ This library is particularly useful for:
247
+
248
+ * **credit risk models**
249
+ * **tabular ML problems**
250
+ * **high-stakes predictive modeling**
251
+ * **interpretable ML workflows**
252
+
253
+ ---
254
+
255
+ # License
256
+
257
+ MIT License
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "maxwailab"
7
+ version = "1.0.0"
8
+ description = "Bootstrap-based model stability and supervised binning toolkit"
9
+ authors = [
10
+ { name="Max Wienandts" }
11
+ ]
12
+ readme = "README.md"
13
+ requires-python = ">=3.10"
14
+
15
+ dependencies = [
16
+ "numpy",
17
+ "pandas",
18
+ "scikit-learn",
19
+ "lightgbm",
20
+ "matplotlib",
21
+ "seaborn",
22
+ "tqdm"
23
+ ]
24
+
25
+ [tool.setuptools]
26
+ package-dir = {"" = "src"}
27
+
28
+ [tool.setuptools.packages.find]
29
+ where = ["src"]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/MaxWienandts/maxwailab"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ from .binning import (
2
+ tree_supervised_binning,
3
+ bootstrap_tree_binning_auc_analysis,
4
+ )
5
+
6
+ from .feature_selection import (
7
+ bootstrap_lightgbm_forward_selection,
8
+ performance_forward_selection_boxplot,
9
+ variable_frequency_forward_selection,
10
+ top_k_forward_selection_variables_by_frequency_usage,
11
+ top_k_variables_by__forward_selection_boxplot,
12
+ )
13
+
14
+ from .hyperparameter_analysis import (
15
+ lightgbm_hyperparameter_auc_curve_bootstrap
16
+ )