lambda-guard-boosting 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/PKG-INFO +16 -6
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/README.md +15 -5
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambda_guard_boosting.egg-info/PKG-INFO +16 -6
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambdaguard/lambdaguard.py +3 -3
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambdaguard/ofi.py +106 -2
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/pyproject.toml +1 -1
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/LICENSE.md +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambda_guard_boosting.egg-info/SOURCES.txt +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambda_guard_boosting.egg-info/dependency_links.txt +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambda_guard_boosting.egg-info/requires.txt +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambda_guard_boosting.egg-info/top_level.txt +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambdaguard/__init__.py +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambdaguard/cusum.py +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/setup.cfg +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/tests/test_cusum.py +0 -0
- {lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/tests/test_ofi.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lambda-guard-boosting
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Overfitting detection for Gradient Boosting models using λ-Guard methodology.
|
|
5
5
|
Author-email: "Fabrizio Di Sciorio, PhD" <fabriziodisciorio91@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -194,17 +194,18 @@ Install via GitHub:
|
|
|
194
194
|
pip install git+https://github.com/faberBI/lambdaguard.git
|
|
195
195
|
|
|
196
196
|
from sklearn.ensemble import GradientBoostingRegressor
|
|
197
|
-
from lambdaguard.ofi import
|
|
197
|
+
from lambdaguard.ofi import generalization_index, instability_index,
|
|
198
198
|
from lambdaguard.lambda_guard import lambda_guard_test, interpret
|
|
199
|
-
from lambdaguard.cusum import
|
|
199
|
+
from lambdaguard.cusum import lambda_detect
|
|
200
200
|
import pandas as pd
|
|
201
201
|
|
|
202
202
|
# Fit a model
|
|
203
203
|
model = GradientBoostingRegressor(n_estimators=50, max_depth=3)
|
|
204
204
|
model.fit(X_train, y_train)
|
|
205
205
|
|
|
206
|
-
#
|
|
207
|
-
|
|
206
|
+
# Generalization index
|
|
207
|
+
GI, A, C = overfitting_index(model, X_train, y_train)
|
|
208
|
+
print('Generalization index: ", GI)
|
|
208
209
|
|
|
209
210
|
# Lambda-guard test
|
|
210
211
|
lg_res = lambda_guard_test(model, X_train)
|
|
@@ -215,7 +216,16 @@ df = pd.DataFrame([
|
|
|
215
216
|
{"model": "GBR", "n_estimators": 50, "max_depth": 3, "A": 0.8, "OFI_norm": 0.2},
|
|
216
217
|
{"model": "GBR", "n_estimators": 100, "max_depth": 5, "A": 0.85, "OFI_norm": 0.3},
|
|
217
218
|
])
|
|
218
|
-
cusum_res =
|
|
219
|
+
cusum_res = lambda_detect(
|
|
220
|
+
df,
|
|
221
|
+
model_name,
|
|
222
|
+
complexity_metric="combined",
|
|
223
|
+
lambda_col="OFI_norm",
|
|
224
|
+
alignment_col="A",
|
|
225
|
+
smooth_window=3,
|
|
226
|
+
cusum_threshold_factor=1.5,
|
|
227
|
+
baseline_points=10
|
|
228
|
+
)
|
|
219
229
|
|
|
220
230
|
```
|
|
221
231
|
|
|
@@ -166,17 +166,18 @@ Install via GitHub:
|
|
|
166
166
|
pip install git+https://github.com/faberBI/lambdaguard.git
|
|
167
167
|
|
|
168
168
|
from sklearn.ensemble import GradientBoostingRegressor
|
|
169
|
-
from lambdaguard.ofi import
|
|
169
|
+
from lambdaguard.ofi import generalization_index, instability_index,
|
|
170
170
|
from lambdaguard.lambda_guard import lambda_guard_test, interpret
|
|
171
|
-
from lambdaguard.cusum import
|
|
171
|
+
from lambdaguard.cusum import lambda_detect
|
|
172
172
|
import pandas as pd
|
|
173
173
|
|
|
174
174
|
# Fit a model
|
|
175
175
|
model = GradientBoostingRegressor(n_estimators=50, max_depth=3)
|
|
176
176
|
model.fit(X_train, y_train)
|
|
177
177
|
|
|
178
|
-
#
|
|
179
|
-
|
|
178
|
+
# Generalization index
|
|
179
|
+
GI, A, C = overfitting_index(model, X_train, y_train)
|
|
180
|
+
print('Generalization index: ", GI)
|
|
180
181
|
|
|
181
182
|
# Lambda-guard test
|
|
182
183
|
lg_res = lambda_guard_test(model, X_train)
|
|
@@ -187,7 +188,16 @@ df = pd.DataFrame([
|
|
|
187
188
|
{"model": "GBR", "n_estimators": 50, "max_depth": 3, "A": 0.8, "OFI_norm": 0.2},
|
|
188
189
|
{"model": "GBR", "n_estimators": 100, "max_depth": 5, "A": 0.85, "OFI_norm": 0.3},
|
|
189
190
|
])
|
|
190
|
-
cusum_res =
|
|
191
|
+
cusum_res = lambda_detect(
|
|
192
|
+
df,
|
|
193
|
+
model_name,
|
|
194
|
+
complexity_metric="combined",
|
|
195
|
+
lambda_col="OFI_norm",
|
|
196
|
+
alignment_col="A",
|
|
197
|
+
smooth_window=3,
|
|
198
|
+
cusum_threshold_factor=1.5,
|
|
199
|
+
baseline_points=10
|
|
200
|
+
)
|
|
191
201
|
|
|
192
202
|
```
|
|
193
203
|
|
{lambda_guard_boosting-0.2.3 → lambda_guard_boosting-0.2.4}/lambda_guard_boosting.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lambda-guard-boosting
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Overfitting detection for Gradient Boosting models using λ-Guard methodology.
|
|
5
5
|
Author-email: "Fabrizio Di Sciorio, PhD" <fabriziodisciorio91@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -194,17 +194,18 @@ Install via GitHub:
|
|
|
194
194
|
pip install git+https://github.com/faberBI/lambdaguard.git
|
|
195
195
|
|
|
196
196
|
from sklearn.ensemble import GradientBoostingRegressor
|
|
197
|
-
from lambdaguard.ofi import
|
|
197
|
+
from lambdaguard.ofi import generalization_index, instability_index,
|
|
198
198
|
from lambdaguard.lambda_guard import lambda_guard_test, interpret
|
|
199
|
-
from lambdaguard.cusum import
|
|
199
|
+
from lambdaguard.cusum import lambda_detect
|
|
200
200
|
import pandas as pd
|
|
201
201
|
|
|
202
202
|
# Fit a model
|
|
203
203
|
model = GradientBoostingRegressor(n_estimators=50, max_depth=3)
|
|
204
204
|
model.fit(X_train, y_train)
|
|
205
205
|
|
|
206
|
-
#
|
|
207
|
-
|
|
206
|
+
# Generalization index
|
|
207
|
+
GI, A, C = overfitting_index(model, X_train, y_train)
|
|
208
|
+
print('Generalization index: ", GI)
|
|
208
209
|
|
|
209
210
|
# Lambda-guard test
|
|
210
211
|
lg_res = lambda_guard_test(model, X_train)
|
|
@@ -215,7 +216,16 @@ df = pd.DataFrame([
|
|
|
215
216
|
{"model": "GBR", "n_estimators": 50, "max_depth": 3, "A": 0.8, "OFI_norm": 0.2},
|
|
216
217
|
{"model": "GBR", "n_estimators": 100, "max_depth": 5, "A": 0.85, "OFI_norm": 0.3},
|
|
217
218
|
])
|
|
218
|
-
cusum_res =
|
|
219
|
+
cusum_res = lambda_detect(
|
|
220
|
+
df,
|
|
221
|
+
model_name,
|
|
222
|
+
complexity_metric="combined",
|
|
223
|
+
lambda_col="OFI_norm",
|
|
224
|
+
alignment_col="A",
|
|
225
|
+
smooth_window=3,
|
|
226
|
+
cusum_threshold_factor=1.5,
|
|
227
|
+
baseline_points=10
|
|
228
|
+
)
|
|
219
229
|
|
|
220
230
|
```
|
|
221
231
|
|
|
@@ -50,9 +50,9 @@ def lambda_guard_test(model, X, B=300, alpha=0.05, plot=True):
|
|
|
50
50
|
|
|
51
51
|
def interpret(res):
|
|
52
52
|
if not res["reject_H0"]:
|
|
53
|
-
return "✔ REGIME
|
|
53
|
+
return "✔ STABLE REGIME"
|
|
54
54
|
if res["p_df_ratio"] < 0.05 and res["p_peak_ratio"] < 0.05:
|
|
55
|
-
return "✖
|
|
55
|
+
return "✖ OVERFITTING"
|
|
56
56
|
if res["p_df_ratio"] < 0.05:
|
|
57
|
-
return "✖
|
|
57
|
+
return "✖ HIGH COMPLEXITY"
|
|
58
58
|
return "✖ (LEVERAGE SPIKES)"
|
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
|
7
7
|
import seaborn as sns
|
|
8
8
|
import matplotlib.pyplot as plt
|
|
9
9
|
from itertools import product
|
|
10
|
+
import statsmodels.api as sm
|
|
10
11
|
|
|
11
12
|
from sklearn.model_selection import train_test_split
|
|
12
13
|
from sklearn.metrics import mean_squared_error
|
|
@@ -21,10 +22,113 @@ from catboost import CatBoostRegressor
|
|
|
21
22
|
# GENERALIZATION COMPONENTS
|
|
22
23
|
# -----------------------------
|
|
23
24
|
def generalization_index(model, X, y):
|
|
24
|
-
|
|
25
|
+
"""
|
|
26
|
+
Generalization Index (GI) universale per:
|
|
27
|
+
- XGBoost (Booster e sklearn API)
|
|
28
|
+
- LightGBM
|
|
29
|
+
- CatBoost
|
|
30
|
+
- sklearn GradientBoosting
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
GI, A (alignment), C (capacity)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# --------------------------------------------------
|
|
37
|
+
# 1️⃣ OTTENIAMO MATRICE FOGLIE (leaf_matrix)
|
|
38
|
+
# --------------------------------------------------
|
|
39
|
+
|
|
40
|
+
leaf_matrix = None
|
|
41
|
+
preds = None
|
|
42
|
+
|
|
43
|
+
# ----------------------------
|
|
44
|
+
# XGBOOST - Booster nativo
|
|
45
|
+
# ----------------------------
|
|
46
|
+
if hasattr(model, "predict") and hasattr(model, "get_booster") is False and "xgboost" in str(type(model)).lower():
|
|
47
|
+
try:
|
|
48
|
+
import xgboost as xgb
|
|
49
|
+
dmatrix = xgb.DMatrix(X)
|
|
50
|
+
leaf_matrix = model.predict(dmatrix, pred_leaf=True)
|
|
51
|
+
preds = model.predict(dmatrix)
|
|
52
|
+
except:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
# ----------------------------
|
|
56
|
+
# XGBOOST - sklearn API
|
|
57
|
+
# ----------------------------
|
|
58
|
+
if leaf_matrix is None and hasattr(model, "get_booster"):
|
|
59
|
+
leaf_matrix = model.apply(X)
|
|
60
|
+
preds = model.predict(X)
|
|
61
|
+
|
|
62
|
+
# ----------------------------
|
|
63
|
+
# LIGHTGBM
|
|
64
|
+
# ----------------------------
|
|
65
|
+
if leaf_matrix is None and "lightgbm" in str(type(model)).lower():
|
|
66
|
+
leaf_matrix = model.predict(X, pred_leaf=True)
|
|
67
|
+
preds = model.predict(X)
|
|
68
|
+
|
|
69
|
+
# ----------------------------
|
|
70
|
+
# CATBOOST
|
|
71
|
+
# ----------------------------
|
|
72
|
+
if leaf_matrix is None and "catboost" in str(type(model)).lower():
|
|
73
|
+
leaf_matrix = model.calc_leaf_indexes(X)
|
|
74
|
+
preds = model.predict(X)
|
|
75
|
+
|
|
76
|
+
# ----------------------------
|
|
77
|
+
# SKLEARN GradientBoosting
|
|
78
|
+
# ----------------------------
|
|
79
|
+
if leaf_matrix is None and hasattr(model, "estimators_"):
|
|
80
|
+
leaf_list = []
|
|
81
|
+
for est in model.estimators_.ravel():
|
|
82
|
+
leaf_list.append(est.apply(X))
|
|
83
|
+
leaf_matrix = np.column_stack(leaf_list)
|
|
84
|
+
preds = model.predict(X)
|
|
85
|
+
|
|
86
|
+
# Se ancora None → errore
|
|
87
|
+
if leaf_matrix is None:
|
|
88
|
+
raise ValueError("Modello non supportato per GI computation")
|
|
89
|
+
|
|
90
|
+
# --------------------------------------------------
|
|
91
|
+
# 2️⃣ GARANTIAMO MATRICE 2D
|
|
92
|
+
# --------------------------------------------------
|
|
93
|
+
leaf_matrix = np.array(leaf_matrix)
|
|
94
|
+
if leaf_matrix.ndim == 1:
|
|
95
|
+
leaf_matrix = leaf_matrix.reshape(-1, 1)
|
|
96
|
+
|
|
97
|
+
# --------------------------------------------------
|
|
98
|
+
# 3️⃣ COSTRUZIONE MATRICE Z
|
|
99
|
+
# --------------------------------------------------
|
|
100
|
+
Z_cols = []
|
|
101
|
+
|
|
102
|
+
for t in range(leaf_matrix.shape[1]):
|
|
103
|
+
leaf_ids = leaf_matrix[:, t]
|
|
104
|
+
unique_leaves = np.unique(leaf_ids)
|
|
105
|
+
|
|
106
|
+
for leaf in unique_leaves:
|
|
107
|
+
Z_cols.append((leaf_ids == leaf).astype(float))
|
|
108
|
+
|
|
109
|
+
if len(Z_cols) == 0:
|
|
110
|
+
return 0, 0, 0
|
|
111
|
+
|
|
112
|
+
Z = np.column_stack(Z_cols)
|
|
113
|
+
|
|
114
|
+
# --------------------------------------------------
|
|
115
|
+
# 4️⃣ CAPACITY
|
|
116
|
+
# --------------------------------------------------
|
|
117
|
+
C = np.var(Z)
|
|
118
|
+
|
|
119
|
+
# --------------------------------------------------
|
|
120
|
+
# 5️⃣ ALIGNMENT
|
|
121
|
+
# --------------------------------------------------
|
|
122
|
+
if preds is None:
|
|
123
|
+
preds = model.predict(X)
|
|
124
|
+
|
|
25
125
|
A = np.corrcoef(preds, y)[0, 1] if np.std(preds) > 0 else 0
|
|
26
|
-
|
|
126
|
+
|
|
127
|
+
# --------------------------------------------------
|
|
128
|
+
# 6️⃣ GENERALIZATION INDEX
|
|
129
|
+
# --------------------------------------------------
|
|
27
130
|
GI = A / C if C > 0 else 0
|
|
131
|
+
|
|
28
132
|
return GI, A, C
|
|
29
133
|
|
|
30
134
|
def instability_index(model, X, noise_std=1e-3, seed=42):
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lambda-guard-boosting"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "Overfitting detection for Gradient Boosting models using λ-Guard methodology."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|