panelbox 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- panelbox/__init__.py +67 -0
- panelbox/__version__.py +14 -0
- panelbox/cli/__init__.py +0 -0
- panelbox/cli/{commands}/__init__.py +0 -0
- panelbox/core/__init__.py +0 -0
- panelbox/core/base_model.py +164 -0
- panelbox/core/formula_parser.py +318 -0
- panelbox/core/panel_data.py +387 -0
- panelbox/core/results.py +366 -0
- panelbox/datasets/__init__.py +0 -0
- panelbox/datasets/{data}/__init__.py +0 -0
- panelbox/gmm/__init__.py +65 -0
- panelbox/gmm/difference_gmm.py +645 -0
- panelbox/gmm/estimator.py +562 -0
- panelbox/gmm/instruments.py +580 -0
- panelbox/gmm/results.py +550 -0
- panelbox/gmm/system_gmm.py +621 -0
- panelbox/gmm/tests.py +535 -0
- panelbox/models/__init__.py +11 -0
- panelbox/models/dynamic/__init__.py +0 -0
- panelbox/models/iv/__init__.py +0 -0
- panelbox/models/static/__init__.py +13 -0
- panelbox/models/static/fixed_effects.py +516 -0
- panelbox/models/static/pooled_ols.py +298 -0
- panelbox/models/static/random_effects.py +512 -0
- panelbox/report/__init__.py +61 -0
- panelbox/report/asset_manager.py +410 -0
- panelbox/report/css_manager.py +472 -0
- panelbox/report/exporters/__init__.py +15 -0
- panelbox/report/exporters/html_exporter.py +440 -0
- panelbox/report/exporters/latex_exporter.py +510 -0
- panelbox/report/exporters/markdown_exporter.py +446 -0
- panelbox/report/renderers/__init__.py +11 -0
- panelbox/report/renderers/static/__init__.py +0 -0
- panelbox/report/renderers/static_validation_renderer.py +341 -0
- panelbox/report/report_manager.py +502 -0
- panelbox/report/template_manager.py +337 -0
- panelbox/report/transformers/__init__.py +0 -0
- panelbox/report/transformers/static/__init__.py +0 -0
- panelbox/report/validation_transformer.py +449 -0
- panelbox/standard_errors/__init__.py +0 -0
- panelbox/templates/__init__.py +0 -0
- panelbox/templates/assets/css/base_styles.css +382 -0
- panelbox/templates/assets/css/report_components.css +747 -0
- panelbox/templates/assets/js/tab-navigation.js +161 -0
- panelbox/templates/assets/js/utils.js +276 -0
- panelbox/templates/common/footer.html +24 -0
- panelbox/templates/common/header.html +44 -0
- panelbox/templates/common/meta.html +5 -0
- panelbox/templates/validation/interactive/index.html +272 -0
- panelbox/templates/validation/interactive/partials/charts.html +58 -0
- panelbox/templates/validation/interactive/partials/methodology.html +201 -0
- panelbox/templates/validation/interactive/partials/overview.html +146 -0
- panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
- panelbox/templates/validation/interactive/partials/test_results.html +231 -0
- panelbox/utils/__init__.py +0 -0
- panelbox/utils/formatting.py +172 -0
- panelbox/utils/matrix_ops.py +233 -0
- panelbox/utils/statistical.py +173 -0
- panelbox/validation/__init__.py +58 -0
- panelbox/validation/base.py +175 -0
- panelbox/validation/cointegration/__init__.py +0 -0
- panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
- panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
- panelbox/validation/cross_sectional_dependence/frees.py +297 -0
- panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
- panelbox/validation/heteroskedasticity/__init__.py +13 -0
- panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
- panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
- panelbox/validation/heteroskedasticity/white.py +208 -0
- panelbox/validation/instruments/__init__.py +0 -0
- panelbox/validation/robustness/__init__.py +0 -0
- panelbox/validation/serial_correlation/__init__.py +13 -0
- panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
- panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
- panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
- panelbox/validation/specification/__init__.py +16 -0
- panelbox/validation/specification/chow.py +273 -0
- panelbox/validation/specification/hausman.py +264 -0
- panelbox/validation/specification/mundlak.py +331 -0
- panelbox/validation/specification/reset.py +273 -0
- panelbox/validation/unit_root/__init__.py +0 -0
- panelbox/validation/validation_report.py +257 -0
- panelbox/validation/validation_suite.py +401 -0
- panelbox-0.2.0.dist-info/METADATA +337 -0
- panelbox-0.2.0.dist-info/RECORD +90 -0
- panelbox-0.2.0.dist-info/WHEEL +5 -0
- panelbox-0.2.0.dist-info/entry_points.txt +2 -0
- panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
- panelbox-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Heteroskedasticity tests for panel models.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from panelbox.validation.heteroskedasticity.modified_wald import ModifiedWaldTest
|
|
6
|
+
from panelbox.validation.heteroskedasticity.breusch_pagan import BreuschPaganTest
|
|
7
|
+
from panelbox.validation.heteroskedasticity.white import WhiteTest
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
'ModifiedWaldTest',
|
|
11
|
+
'BreuschPaganTest',
|
|
12
|
+
'WhiteTest',
|
|
13
|
+
]
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Breusch-Pagan LM test for heteroskedasticity in panel data.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Breusch, T. S., & Pagan, A. R. (1979). A simple test for heteroscedasticity
|
|
7
|
+
and random coefficient variation. Econometrica, 47(5), 1287-1294.
|
|
8
|
+
|
|
9
|
+
Greene, W. H. (2018). Econometric Analysis (8th ed.). Pearson.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from scipy import stats
|
|
15
|
+
|
|
16
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BreuschPaganTest(ValidationTest):
|
|
20
|
+
"""
|
|
21
|
+
Breusch-Pagan LM test for heteroskedasticity.
|
|
22
|
+
|
|
23
|
+
Tests the null hypothesis that the error variance is constant
|
|
24
|
+
(homoskedasticity) against the alternative that the variance
|
|
25
|
+
is a function of the regressors.
|
|
26
|
+
|
|
27
|
+
H0: sigma²_i = sigma² (homoskedasticity)
|
|
28
|
+
H1: sigma²_i = h(X_i) (heteroskedasticity)
|
|
29
|
+
|
|
30
|
+
The test regresses squared residuals on the original regressors
|
|
31
|
+
and tests if the coefficients are jointly zero using an LM statistic.
|
|
32
|
+
|
|
33
|
+
Notes
|
|
34
|
+
-----
|
|
35
|
+
The test statistic is n*R² from the auxiliary regression, which
|
|
36
|
+
follows a chi-squared distribution with k degrees of freedom
|
|
37
|
+
under the null hypothesis, where k is the number of regressors
|
|
38
|
+
(excluding the constant).
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
>>> from panelbox.models.static.pooled_ols import PooledOLS
|
|
43
|
+
>>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
|
|
44
|
+
>>> results = model.fit()
|
|
45
|
+
>>>
|
|
46
|
+
>>> from panelbox.validation.heteroskedasticity.breusch_pagan import BreuschPaganTest
|
|
47
|
+
>>> test = BreuschPaganTest(results)
|
|
48
|
+
>>> result = test.run()
|
|
49
|
+
>>> print(result)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, results: 'PanelResults'):
|
|
53
|
+
"""
|
|
54
|
+
Initialize Breusch-Pagan test.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
results : PanelResults
|
|
59
|
+
Results from panel model estimation
|
|
60
|
+
"""
|
|
61
|
+
super().__init__(results)
|
|
62
|
+
|
|
63
|
+
# Store original design matrix if available
|
|
64
|
+
# We'll need the original X matrix for the auxiliary regression
|
|
65
|
+
self._X = None
|
|
66
|
+
if hasattr(results, '_model'):
|
|
67
|
+
if hasattr(results._model, '_X_orig'):
|
|
68
|
+
self._X = results._model._X_orig
|
|
69
|
+
|
|
70
|
+
def run(self, alpha: float = 0.05) -> ValidationTestResult:
|
|
71
|
+
"""
|
|
72
|
+
Run Breusch-Pagan LM test for heteroskedasticity.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
alpha : float, default=0.05
|
|
77
|
+
Significance level
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
ValidationTestResult
|
|
82
|
+
Test results
|
|
83
|
+
|
|
84
|
+
Raises
|
|
85
|
+
------
|
|
86
|
+
ValueError
|
|
87
|
+
If design matrix is not available
|
|
88
|
+
|
|
89
|
+
Notes
|
|
90
|
+
-----
|
|
91
|
+
The test procedure:
|
|
92
|
+
1. Estimate the original model and obtain residuals e
|
|
93
|
+
2. Compute squared residuals e²
|
|
94
|
+
3. Regress e² on the original regressors X
|
|
95
|
+
4. Compute LM statistic = n*R² from this auxiliary regression
|
|
96
|
+
5. Compare to chi-squared(k) distribution
|
|
97
|
+
"""
|
|
98
|
+
# Get squared residuals
|
|
99
|
+
resid_sq = self.resid ** 2
|
|
100
|
+
|
|
101
|
+
# We need the design matrix X
|
|
102
|
+
# Try to get it from the model or reconstruct it
|
|
103
|
+
X = self._get_design_matrix()
|
|
104
|
+
|
|
105
|
+
if X is None:
|
|
106
|
+
raise ValueError(
|
|
107
|
+
"Design matrix not available. "
|
|
108
|
+
"Breusch-Pagan test requires access to the original regressors."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
n = len(resid_sq)
|
|
112
|
+
|
|
113
|
+
# Auxiliary regression: resid² on X
|
|
114
|
+
# OLS: beta_aux = (X'X)^{-1} X'resid²
|
|
115
|
+
try:
|
|
116
|
+
XtX = X.T @ X
|
|
117
|
+
Xty = X.T @ resid_sq
|
|
118
|
+
beta_aux = np.linalg.solve(XtX, Xty)
|
|
119
|
+
except np.linalg.LinAlgError:
|
|
120
|
+
# Singular matrix, use pseudo-inverse
|
|
121
|
+
beta_aux = np.linalg.lstsq(X, resid_sq, rcond=None)[0]
|
|
122
|
+
|
|
123
|
+
# Fitted values from auxiliary regression
|
|
124
|
+
fitted_aux = X @ beta_aux
|
|
125
|
+
|
|
126
|
+
# R² from auxiliary regression using explained sum of squares
|
|
127
|
+
# This is more numerically stable than 1 - SSR/SST
|
|
128
|
+
mean_resid_sq = np.mean(resid_sq)
|
|
129
|
+
SST = np.sum((resid_sq - mean_resid_sq) ** 2)
|
|
130
|
+
SSE = np.sum((fitted_aux - mean_resid_sq) ** 2)
|
|
131
|
+
|
|
132
|
+
# R² = SSE/SST (explained variation / total variation)
|
|
133
|
+
if SST > 0:
|
|
134
|
+
R2_aux = SSE / SST
|
|
135
|
+
else:
|
|
136
|
+
R2_aux = 0.0
|
|
137
|
+
|
|
138
|
+
# Ensure R² is in [0, 1]
|
|
139
|
+
# Due to numerical errors, R² might be slightly negative or > 1
|
|
140
|
+
R2_aux = np.clip(R2_aux, 0.0, 1.0)
|
|
141
|
+
|
|
142
|
+
# LM statistic = n * R²
|
|
143
|
+
# This must be non-negative
|
|
144
|
+
lm_stat = n * R2_aux
|
|
145
|
+
|
|
146
|
+
# Sanity check: LM statistic must be >= 0
|
|
147
|
+
if lm_stat < 0:
|
|
148
|
+
# This should never happen, but if it does, set to 0
|
|
149
|
+
lm_stat = 0.0
|
|
150
|
+
|
|
151
|
+
# Degrees of freedom = number of regressors (excluding constant)
|
|
152
|
+
k = X.shape[1]
|
|
153
|
+
|
|
154
|
+
# Check if X has a constant column (all 1s or all same value)
|
|
155
|
+
# Check first and last columns as constant might be anywhere
|
|
156
|
+
has_constant = False
|
|
157
|
+
for col_idx in range(k):
|
|
158
|
+
col = X[:, col_idx]
|
|
159
|
+
if np.allclose(col, col[0]):
|
|
160
|
+
has_constant = True
|
|
161
|
+
break
|
|
162
|
+
|
|
163
|
+
if has_constant:
|
|
164
|
+
df = k - 1
|
|
165
|
+
else:
|
|
166
|
+
df = k
|
|
167
|
+
|
|
168
|
+
if df <= 0:
|
|
169
|
+
df = 1 # At least 1 df
|
|
170
|
+
|
|
171
|
+
# P-value from chi-squared distribution
|
|
172
|
+
pvalue = 1 - stats.chi2.cdf(lm_stat, df)
|
|
173
|
+
|
|
174
|
+
# Metadata
|
|
175
|
+
metadata = {
|
|
176
|
+
'R2_auxiliary': R2_aux,
|
|
177
|
+
'n_obs': n,
|
|
178
|
+
'n_regressors': k
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
result = ValidationTestResult(
|
|
182
|
+
test_name="Breusch-Pagan LM Test for Heteroskedasticity",
|
|
183
|
+
statistic=lm_stat,
|
|
184
|
+
pvalue=pvalue,
|
|
185
|
+
null_hypothesis="Homoskedasticity (constant error variance)",
|
|
186
|
+
alternative_hypothesis="Heteroskedasticity (variance depends on regressors)",
|
|
187
|
+
alpha=alpha,
|
|
188
|
+
df=df,
|
|
189
|
+
metadata=metadata
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
def _get_design_matrix(self) -> np.ndarray:
|
|
195
|
+
"""
|
|
196
|
+
Get the design matrix X.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
np.ndarray or None
|
|
201
|
+
Design matrix, or None if not available
|
|
202
|
+
"""
|
|
203
|
+
# Try to get from stored _X
|
|
204
|
+
if self._X is not None:
|
|
205
|
+
return self._X
|
|
206
|
+
|
|
207
|
+
# Try to get from model through results
|
|
208
|
+
if hasattr(self.results, '_model'):
|
|
209
|
+
model = self.results._model
|
|
210
|
+
|
|
211
|
+
# Try to rebuild design matrices
|
|
212
|
+
if hasattr(model, 'formula_parser') and hasattr(model, 'data'):
|
|
213
|
+
try:
|
|
214
|
+
_, X = model.formula_parser.build_design_matrices(
|
|
215
|
+
model.data.data,
|
|
216
|
+
return_type='array'
|
|
217
|
+
)
|
|
218
|
+
return X
|
|
219
|
+
except Exception:
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
return None
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modified Wald test for groupwise heteroskedasticity in fixed effects models.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Greene, W. H. (2000). Econometric Analysis (4th ed.). Prentice Hall.
|
|
7
|
+
|
|
8
|
+
Baum, C. F. (2001). Residual diagnostics for cross-section time series
|
|
9
|
+
regression models. The Stata Journal, 1(1), 101-104.
|
|
10
|
+
|
|
11
|
+
Stata command: xttest3
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from scipy import stats
|
|
17
|
+
|
|
18
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ModifiedWaldTest(ValidationTest):
|
|
22
|
+
"""
|
|
23
|
+
Modified Wald test for groupwise heteroskedasticity.
|
|
24
|
+
|
|
25
|
+
Tests the null hypothesis that the error variance is the same across
|
|
26
|
+
all cross-sectional units (entities) against the alternative that
|
|
27
|
+
variances differ across groups.
|
|
28
|
+
|
|
29
|
+
H0: sigma²_1 = sigma²_2 = ... = sigma²_N
|
|
30
|
+
H1: sigma²_i ≠ sigma²_j for some i ≠ j
|
|
31
|
+
|
|
32
|
+
This test is specifically designed for fixed effects panel models and
|
|
33
|
+
is robust to serial correlation.
|
|
34
|
+
|
|
35
|
+
Notes
|
|
36
|
+
-----
|
|
37
|
+
The test statistic follows a chi-squared distribution with N degrees
|
|
38
|
+
of freedom under the null hypothesis, where N is the number of entities.
|
|
39
|
+
|
|
40
|
+
This test requires that each entity has the same number of time periods
|
|
41
|
+
(balanced panel) or can be adapted for unbalanced panels.
|
|
42
|
+
|
|
43
|
+
Examples
|
|
44
|
+
--------
|
|
45
|
+
>>> from panelbox.models.static.fixed_effects import FixedEffects
|
|
46
|
+
>>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
|
|
47
|
+
>>> results = fe.fit()
|
|
48
|
+
>>>
|
|
49
|
+
>>> from panelbox.validation.heteroskedasticity.modified_wald import ModifiedWaldTest
|
|
50
|
+
>>> test = ModifiedWaldTest(results)
|
|
51
|
+
>>> result = test.run()
|
|
52
|
+
>>> print(result)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, results: 'PanelResults'):
|
|
56
|
+
"""
|
|
57
|
+
Initialize Modified Wald test.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
results : PanelResults
|
|
62
|
+
Results from panel model estimation (preferably Fixed Effects)
|
|
63
|
+
"""
|
|
64
|
+
super().__init__(results)
|
|
65
|
+
|
|
66
|
+
# Check if model is suitable
|
|
67
|
+
if 'Fixed Effects' not in self.model_type:
|
|
68
|
+
import warnings
|
|
69
|
+
warnings.warn(
|
|
70
|
+
"Modified Wald test is designed for Fixed Effects models. "
|
|
71
|
+
f"Current model: {self.model_type}"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def run(self, alpha: float = 0.05) -> ValidationTestResult:
|
|
75
|
+
"""
|
|
76
|
+
Run Modified Wald test for groupwise heteroskedasticity.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
alpha : float, default=0.05
|
|
81
|
+
Significance level
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
ValidationTestResult
|
|
86
|
+
Test results
|
|
87
|
+
|
|
88
|
+
Raises
|
|
89
|
+
------
|
|
90
|
+
ValueError
|
|
91
|
+
If required data indices are not available
|
|
92
|
+
"""
|
|
93
|
+
# Get residuals with entity information
|
|
94
|
+
resid_df = self._prepare_residual_data()
|
|
95
|
+
|
|
96
|
+
# Compute variance for each entity
|
|
97
|
+
entity_vars = resid_df.groupby('entity')['resid'].var()
|
|
98
|
+
entity_counts = resid_df.groupby('entity').size()
|
|
99
|
+
|
|
100
|
+
n_entities = len(entity_vars)
|
|
101
|
+
|
|
102
|
+
# Modified Wald statistic
|
|
103
|
+
# sum over i of: (T_i - 1) * ln(sigma²_pooled) - ln(sigma²_i)
|
|
104
|
+
# where sigma²_pooled is the pooled variance
|
|
105
|
+
|
|
106
|
+
# Pooled variance (weighted by sample size)
|
|
107
|
+
total_resid_sq = np.sum((resid_df['resid'] ** 2).values)
|
|
108
|
+
total_obs = len(resid_df)
|
|
109
|
+
k = len(self.params) # number of parameters
|
|
110
|
+
pooled_var = total_resid_sq / (total_obs - n_entities - k)
|
|
111
|
+
|
|
112
|
+
# Wald statistic
|
|
113
|
+
wald_stat = 0.0
|
|
114
|
+
for entity in entity_vars.index:
|
|
115
|
+
T_i = entity_counts[entity]
|
|
116
|
+
sigma2_i = entity_vars[entity]
|
|
117
|
+
|
|
118
|
+
if sigma2_i <= 0:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
wald_stat += (T_i * np.log(pooled_var / sigma2_i))
|
|
122
|
+
|
|
123
|
+
# Under H0, the statistic is approximately chi2(N)
|
|
124
|
+
df = n_entities
|
|
125
|
+
pvalue = 1 - stats.chi2.cdf(wald_stat, df)
|
|
126
|
+
|
|
127
|
+
# Metadata
|
|
128
|
+
metadata = {
|
|
129
|
+
'n_entities': n_entities,
|
|
130
|
+
'pooled_variance': pooled_var,
|
|
131
|
+
'min_entity_var': entity_vars.min(),
|
|
132
|
+
'max_entity_var': entity_vars.max(),
|
|
133
|
+
'variance_ratio': entity_vars.max() / entity_vars.min() if entity_vars.min() > 0 else np.inf
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
result = ValidationTestResult(
|
|
137
|
+
test_name="Modified Wald Test for Groupwise Heteroskedasticity",
|
|
138
|
+
statistic=wald_stat,
|
|
139
|
+
pvalue=pvalue,
|
|
140
|
+
null_hypothesis="Homoskedasticity (constant variance across entities)",
|
|
141
|
+
alternative_hypothesis="Groupwise heteroskedasticity present",
|
|
142
|
+
alpha=alpha,
|
|
143
|
+
df=df,
|
|
144
|
+
metadata=metadata
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return result
|
|
148
|
+
|
|
149
|
+
def _prepare_residual_data(self) -> pd.DataFrame:
|
|
150
|
+
"""
|
|
151
|
+
Prepare residual data with entity identifiers.
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
pd.DataFrame
|
|
156
|
+
DataFrame with columns: entity, resid
|
|
157
|
+
"""
|
|
158
|
+
if hasattr(self.results, 'entity_index'):
|
|
159
|
+
# Ensure resid is 1D
|
|
160
|
+
resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
|
|
161
|
+
|
|
162
|
+
resid_df = pd.DataFrame({
|
|
163
|
+
'entity': self.results.entity_index,
|
|
164
|
+
'resid': resid_flat
|
|
165
|
+
})
|
|
166
|
+
else:
|
|
167
|
+
raise AttributeError(
|
|
168
|
+
"Results object must have 'entity_index' attribute. "
|
|
169
|
+
"Please ensure your model stores this during estimation."
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return resid_df
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""
|
|
2
|
+
White test for heteroskedasticity in panel data.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
White, H. (1980). A heteroskedasticity-consistent covariance matrix estimator
|
|
7
|
+
and a direct test for heteroskedasticity. Econometrica, 48(4), 817-838.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from scipy import stats
|
|
13
|
+
from itertools import combinations_with_replacement
|
|
14
|
+
|
|
15
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class WhiteTest(ValidationTest):
|
|
19
|
+
"""
|
|
20
|
+
White test for heteroskedasticity.
|
|
21
|
+
|
|
22
|
+
Tests the null hypothesis of homoskedasticity against a general
|
|
23
|
+
alternative of heteroskedasticity. Unlike Breusch-Pagan, this test
|
|
24
|
+
does not assume a specific functional form for the heteroskedasticity.
|
|
25
|
+
|
|
26
|
+
H0: sigma²_i = sigma² (homoskedasticity)
|
|
27
|
+
H1: sigma²_i depends on regressors in a general way
|
|
28
|
+
|
|
29
|
+
The test regresses squared residuals on the original regressors,
|
|
30
|
+
their squares, and cross-products (interactions).
|
|
31
|
+
|
|
32
|
+
Notes
|
|
33
|
+
-----
|
|
34
|
+
The test statistic is n*R² from the auxiliary regression, which
|
|
35
|
+
follows a chi-squared distribution under the null hypothesis.
|
|
36
|
+
|
|
37
|
+
For models with many regressors, the test can have low power due
|
|
38
|
+
to the large number of terms in the auxiliary regression.
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
>>> from panelbox.models.static.pooled_ols import PooledOLS
|
|
43
|
+
>>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
|
|
44
|
+
>>> results = model.fit()
|
|
45
|
+
>>>
|
|
46
|
+
>>> from panelbox.validation.heteroskedasticity.white import WhiteTest
|
|
47
|
+
>>> test = WhiteTest(results)
|
|
48
|
+
>>> result = test.run()
|
|
49
|
+
>>> print(result)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, results: 'PanelResults'):
|
|
53
|
+
"""
|
|
54
|
+
Initialize White test.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
results : PanelResults
|
|
59
|
+
Results from panel model estimation
|
|
60
|
+
"""
|
|
61
|
+
super().__init__(results)
|
|
62
|
+
self._X = None
|
|
63
|
+
if hasattr(results, '_model'):
|
|
64
|
+
if hasattr(results._model, '_X_orig'):
|
|
65
|
+
self._X = results._model._X_orig
|
|
66
|
+
|
|
67
|
+
def run(self, alpha: float = 0.05, cross_terms: bool = True) -> ValidationTestResult:
|
|
68
|
+
"""
|
|
69
|
+
Run White test for heteroskedasticity.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
alpha : float, default=0.05
|
|
74
|
+
Significance level
|
|
75
|
+
cross_terms : bool, default=True
|
|
76
|
+
If True, include cross-product terms in auxiliary regression
|
|
77
|
+
If False, only include squared terms (less computationally intensive)
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
ValidationTestResult
|
|
82
|
+
Test results
|
|
83
|
+
|
|
84
|
+
Raises
|
|
85
|
+
------
|
|
86
|
+
ValueError
|
|
87
|
+
If design matrix is not available
|
|
88
|
+
"""
|
|
89
|
+
# Get squared residuals
|
|
90
|
+
resid_sq = self.resid ** 2
|
|
91
|
+
|
|
92
|
+
# Get design matrix
|
|
93
|
+
X = self._get_design_matrix()
|
|
94
|
+
|
|
95
|
+
if X is None:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
"Design matrix not available. "
|
|
98
|
+
"White test requires access to the original regressors."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
n = len(resid_sq)
|
|
102
|
+
k_orig = X.shape[1]
|
|
103
|
+
|
|
104
|
+
# Build augmented design matrix with squares and cross-products
|
|
105
|
+
# Remove constant column if present (first column all 1s)
|
|
106
|
+
if np.allclose(X[:, 0], 1.0):
|
|
107
|
+
X_vars = X[:, 1:] # Exclude constant
|
|
108
|
+
has_constant = True
|
|
109
|
+
else:
|
|
110
|
+
X_vars = X
|
|
111
|
+
has_constant = False
|
|
112
|
+
|
|
113
|
+
k_vars = X_vars.shape[1]
|
|
114
|
+
|
|
115
|
+
# Create list of columns for auxiliary regression
|
|
116
|
+
aux_cols = [np.ones(n)] if has_constant or not has_constant else [np.ones(n)]
|
|
117
|
+
|
|
118
|
+
# Add original variables
|
|
119
|
+
for j in range(k_vars):
|
|
120
|
+
aux_cols.append(X_vars[:, j])
|
|
121
|
+
|
|
122
|
+
# Add squared terms
|
|
123
|
+
for j in range(k_vars):
|
|
124
|
+
aux_cols.append(X_vars[:, j] ** 2)
|
|
125
|
+
|
|
126
|
+
# Add cross-product terms if requested
|
|
127
|
+
if cross_terms and k_vars > 1:
|
|
128
|
+
for i, j in combinations_with_replacement(range(k_vars), 2):
|
|
129
|
+
if i < j: # Only upper triangle (avoid duplicates)
|
|
130
|
+
aux_cols.append(X_vars[:, i] * X_vars[:, j])
|
|
131
|
+
|
|
132
|
+
# Stack into matrix
|
|
133
|
+
X_aux = np.column_stack(aux_cols)
|
|
134
|
+
|
|
135
|
+
# Auxiliary regression: resid² on X_aux
|
|
136
|
+
try:
|
|
137
|
+
XtX = X_aux.T @ X_aux
|
|
138
|
+
Xty = X_aux.T @ resid_sq
|
|
139
|
+
beta_aux = np.linalg.solve(XtX, Xty)
|
|
140
|
+
except np.linalg.LinAlgError:
|
|
141
|
+
beta_aux = np.linalg.lstsq(X_aux, resid_sq, rcond=None)[0]
|
|
142
|
+
|
|
143
|
+
# Fitted values
|
|
144
|
+
fitted_aux = X_aux @ beta_aux
|
|
145
|
+
|
|
146
|
+
# R²
|
|
147
|
+
mean_resid_sq = np.mean(resid_sq)
|
|
148
|
+
SST = np.sum((resid_sq - mean_resid_sq) ** 2)
|
|
149
|
+
SSR = np.sum((resid_sq - fitted_aux) ** 2)
|
|
150
|
+
|
|
151
|
+
if SST > 0:
|
|
152
|
+
R2_aux = 1 - SSR / SST
|
|
153
|
+
else:
|
|
154
|
+
R2_aux = 0.0
|
|
155
|
+
|
|
156
|
+
# LM statistic
|
|
157
|
+
lm_stat = n * R2_aux
|
|
158
|
+
|
|
159
|
+
# Degrees of freedom = number of auxiliary regressors - 1 (for constant)
|
|
160
|
+
df = X_aux.shape[1] - 1
|
|
161
|
+
|
|
162
|
+
if df <= 0:
|
|
163
|
+
df = 1
|
|
164
|
+
|
|
165
|
+
# P-value
|
|
166
|
+
pvalue = 1 - stats.chi2.cdf(lm_stat, df)
|
|
167
|
+
|
|
168
|
+
# Metadata
|
|
169
|
+
metadata = {
|
|
170
|
+
'R2_auxiliary': R2_aux,
|
|
171
|
+
'n_obs': n,
|
|
172
|
+
'n_original_regressors': k_orig,
|
|
173
|
+
'n_auxiliary_terms': X_aux.shape[1],
|
|
174
|
+
'includes_cross_terms': cross_terms
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
result = ValidationTestResult(
|
|
178
|
+
test_name="White Test for Heteroskedasticity",
|
|
179
|
+
statistic=lm_stat,
|
|
180
|
+
pvalue=pvalue,
|
|
181
|
+
null_hypothesis="Homoskedasticity (constant error variance)",
|
|
182
|
+
alternative_hypothesis="General heteroskedasticity",
|
|
183
|
+
alpha=alpha,
|
|
184
|
+
df=df,
|
|
185
|
+
metadata=metadata
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
def _get_design_matrix(self) -> np.ndarray:
|
|
191
|
+
"""Get the design matrix X."""
|
|
192
|
+
if self._X is not None:
|
|
193
|
+
return self._X
|
|
194
|
+
|
|
195
|
+
if hasattr(self.results, '_model'):
|
|
196
|
+
model = self.results._model
|
|
197
|
+
|
|
198
|
+
if hasattr(model, 'formula_parser') and hasattr(model, 'data'):
|
|
199
|
+
try:
|
|
200
|
+
_, X = model.formula_parser.build_design_matrices(
|
|
201
|
+
model.data.data,
|
|
202
|
+
return_type='array'
|
|
203
|
+
)
|
|
204
|
+
return X
|
|
205
|
+
except Exception:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
return None
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Serial correlation tests for panel models.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from panelbox.validation.serial_correlation.wooldridge_ar import WooldridgeARTest
|
|
6
|
+
from panelbox.validation.serial_correlation.breusch_godfrey import BreuschGodfreyTest
|
|
7
|
+
from panelbox.validation.serial_correlation.baltagi_wu import BaltagiWuTest
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
'WooldridgeARTest',
|
|
11
|
+
'BreuschGodfreyTest',
|
|
12
|
+
'BaltagiWuTest',
|
|
13
|
+
]
|