panelbox 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- panelbox/__init__.py +67 -0
- panelbox/__version__.py +14 -0
- panelbox/cli/__init__.py +0 -0
- panelbox/cli/{commands}/__init__.py +0 -0
- panelbox/core/__init__.py +0 -0
- panelbox/core/base_model.py +164 -0
- panelbox/core/formula_parser.py +318 -0
- panelbox/core/panel_data.py +387 -0
- panelbox/core/results.py +366 -0
- panelbox/datasets/__init__.py +0 -0
- panelbox/datasets/{data}/__init__.py +0 -0
- panelbox/gmm/__init__.py +65 -0
- panelbox/gmm/difference_gmm.py +645 -0
- panelbox/gmm/estimator.py +562 -0
- panelbox/gmm/instruments.py +580 -0
- panelbox/gmm/results.py +550 -0
- panelbox/gmm/system_gmm.py +621 -0
- panelbox/gmm/tests.py +535 -0
- panelbox/models/__init__.py +11 -0
- panelbox/models/dynamic/__init__.py +0 -0
- panelbox/models/iv/__init__.py +0 -0
- panelbox/models/static/__init__.py +13 -0
- panelbox/models/static/fixed_effects.py +516 -0
- panelbox/models/static/pooled_ols.py +298 -0
- panelbox/models/static/random_effects.py +512 -0
- panelbox/report/__init__.py +61 -0
- panelbox/report/asset_manager.py +410 -0
- panelbox/report/css_manager.py +472 -0
- panelbox/report/exporters/__init__.py +15 -0
- panelbox/report/exporters/html_exporter.py +440 -0
- panelbox/report/exporters/latex_exporter.py +510 -0
- panelbox/report/exporters/markdown_exporter.py +446 -0
- panelbox/report/renderers/__init__.py +11 -0
- panelbox/report/renderers/static/__init__.py +0 -0
- panelbox/report/renderers/static_validation_renderer.py +341 -0
- panelbox/report/report_manager.py +502 -0
- panelbox/report/template_manager.py +337 -0
- panelbox/report/transformers/__init__.py +0 -0
- panelbox/report/transformers/static/__init__.py +0 -0
- panelbox/report/validation_transformer.py +449 -0
- panelbox/standard_errors/__init__.py +0 -0
- panelbox/templates/__init__.py +0 -0
- panelbox/templates/assets/css/base_styles.css +382 -0
- panelbox/templates/assets/css/report_components.css +747 -0
- panelbox/templates/assets/js/tab-navigation.js +161 -0
- panelbox/templates/assets/js/utils.js +276 -0
- panelbox/templates/common/footer.html +24 -0
- panelbox/templates/common/header.html +44 -0
- panelbox/templates/common/meta.html +5 -0
- panelbox/templates/validation/interactive/index.html +272 -0
- panelbox/templates/validation/interactive/partials/charts.html +58 -0
- panelbox/templates/validation/interactive/partials/methodology.html +201 -0
- panelbox/templates/validation/interactive/partials/overview.html +146 -0
- panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
- panelbox/templates/validation/interactive/partials/test_results.html +231 -0
- panelbox/utils/__init__.py +0 -0
- panelbox/utils/formatting.py +172 -0
- panelbox/utils/matrix_ops.py +233 -0
- panelbox/utils/statistical.py +173 -0
- panelbox/validation/__init__.py +58 -0
- panelbox/validation/base.py +175 -0
- panelbox/validation/cointegration/__init__.py +0 -0
- panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
- panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
- panelbox/validation/cross_sectional_dependence/frees.py +297 -0
- panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
- panelbox/validation/heteroskedasticity/__init__.py +13 -0
- panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
- panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
- panelbox/validation/heteroskedasticity/white.py +208 -0
- panelbox/validation/instruments/__init__.py +0 -0
- panelbox/validation/robustness/__init__.py +0 -0
- panelbox/validation/serial_correlation/__init__.py +13 -0
- panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
- panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
- panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
- panelbox/validation/specification/__init__.py +16 -0
- panelbox/validation/specification/chow.py +273 -0
- panelbox/validation/specification/hausman.py +264 -0
- panelbox/validation/specification/mundlak.py +331 -0
- panelbox/validation/specification/reset.py +273 -0
- panelbox/validation/unit_root/__init__.py +0 -0
- panelbox/validation/validation_report.py +257 -0
- panelbox/validation/validation_suite.py +401 -0
- panelbox-0.2.0.dist-info/METADATA +337 -0
- panelbox-0.2.0.dist-info/RECORD +90 -0
- panelbox-0.2.0.dist-info/WHEEL +5 -0
- panelbox-0.2.0.dist-info/entry_points.txt +2 -0
- panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
- panelbox-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Chow test for structural break in panel data models.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Chow, G. C. (1960). Tests of Equality Between Sets of Coefficients in
|
|
7
|
+
Two Linear Regressions. Econometrica, 28(3), 591-605.
|
|
8
|
+
|
|
9
|
+
Baltagi, B. H. (2013). Econometric Analysis of Panel Data (5th ed.).
|
|
10
|
+
Wiley, Chapter 4.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from scipy import stats
|
|
16
|
+
|
|
17
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ChowTest(ValidationTest):
|
|
21
|
+
"""
|
|
22
|
+
Chow test for structural break in panel models.
|
|
23
|
+
|
|
24
|
+
Tests the null hypothesis of parameter stability (no structural break)
|
|
25
|
+
against the alternative that parameters differ across subperiods.
|
|
26
|
+
|
|
27
|
+
H0: beta_1 = beta_2 (parameters are stable)
|
|
28
|
+
H1: beta_1 ≠ beta_2 (structural break exists)
|
|
29
|
+
|
|
30
|
+
The test compares the fit of:
|
|
31
|
+
- Unrestricted model: separate parameters for each subperiod
|
|
32
|
+
- Restricted model: same parameters for all periods
|
|
33
|
+
|
|
34
|
+
Notes
|
|
35
|
+
-----
|
|
36
|
+
The test statistic is:
|
|
37
|
+
F = [(SSR_r - SSR_u) / k] / [SSR_u / (N - 2k)]
|
|
38
|
+
|
|
39
|
+
where:
|
|
40
|
+
- SSR_r = residual sum of squares (restricted model)
|
|
41
|
+
- SSR_u = residual sum of squares (unrestricted model)
|
|
42
|
+
- k = number of parameters
|
|
43
|
+
- N = total number of observations
|
|
44
|
+
|
|
45
|
+
For panel data, we use pooled estimation with cluster-robust inference.
|
|
46
|
+
|
|
47
|
+
Examples
|
|
48
|
+
--------
|
|
49
|
+
>>> from panelbox.models.static.pooled_ols import PooledOLS
|
|
50
|
+
>>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
|
|
51
|
+
>>> results = model.fit()
|
|
52
|
+
>>>
|
|
53
|
+
>>> from panelbox.validation.specification.chow import ChowTest
|
|
54
|
+
>>> test = ChowTest(results)
|
|
55
|
+
>>> # Test for break at time period 5
|
|
56
|
+
>>> result = test.run(break_point=5)
|
|
57
|
+
>>> print(result)
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, results: 'PanelResults'):
|
|
61
|
+
"""
|
|
62
|
+
Initialize Chow test.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
results : PanelResults
|
|
67
|
+
Results from panel model estimation
|
|
68
|
+
"""
|
|
69
|
+
super().__init__(results)
|
|
70
|
+
|
|
71
|
+
def run(self, break_point=None, alpha: float = 0.05) -> ValidationTestResult:
|
|
72
|
+
"""
|
|
73
|
+
Run Chow test for structural break.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
break_point : int or float, optional
|
|
78
|
+
Time period at which to test for structural break.
|
|
79
|
+
If None, uses the median time period.
|
|
80
|
+
Can be specified as:
|
|
81
|
+
- Integer: exact time period
|
|
82
|
+
- Float between 0 and 1: fraction of sample (e.g., 0.5 for midpoint)
|
|
83
|
+
alpha : float, default=0.05
|
|
84
|
+
Significance level
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
ValidationTestResult
|
|
89
|
+
Test results
|
|
90
|
+
|
|
91
|
+
Raises
|
|
92
|
+
------
|
|
93
|
+
ValueError
|
|
94
|
+
If break_point is invalid or data is not available
|
|
95
|
+
|
|
96
|
+
Notes
|
|
97
|
+
-----
|
|
98
|
+
The test requires at least 2*k observations in each subperiod,
|
|
99
|
+
where k is the number of parameters.
|
|
100
|
+
"""
|
|
101
|
+
# Get data
|
|
102
|
+
data, formula, entity_col, time_col, var_names = self._get_data_full()
|
|
103
|
+
|
|
104
|
+
if data is None or formula is None:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
"Data and formula required for Chow test. "
|
|
107
|
+
"Ensure the model was estimated with a formula."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Get unique time periods (sorted)
|
|
111
|
+
time_periods = sorted(data[time_col].unique())
|
|
112
|
+
n_periods = len(time_periods)
|
|
113
|
+
|
|
114
|
+
# Determine break point
|
|
115
|
+
if break_point is None:
|
|
116
|
+
# Use median
|
|
117
|
+
break_idx = n_periods // 2
|
|
118
|
+
break_time = time_periods[break_idx]
|
|
119
|
+
elif isinstance(break_point, float) and 0 < break_point < 1:
|
|
120
|
+
# Fraction of sample
|
|
121
|
+
break_idx = int(n_periods * break_point)
|
|
122
|
+
break_time = time_periods[break_idx]
|
|
123
|
+
elif isinstance(break_point, (int, np.integer)):
|
|
124
|
+
# Exact time period
|
|
125
|
+
if break_point not in time_periods:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
f"Break point {break_point} not found in time periods. "
|
|
128
|
+
f"Available: {time_periods}"
|
|
129
|
+
)
|
|
130
|
+
break_time = break_point
|
|
131
|
+
break_idx = time_periods.index(break_time)
|
|
132
|
+
else:
|
|
133
|
+
raise ValueError(
|
|
134
|
+
"break_point must be None, int (time period), "
|
|
135
|
+
"or float between 0 and 1 (fraction)"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Create subperiod indicator
|
|
139
|
+
data_aug = data.copy()
|
|
140
|
+
data_aug['period_1'] = (data_aug[time_col] < break_time).astype(int)
|
|
141
|
+
data_aug['period_2'] = (data_aug[time_col] >= break_time).astype(int)
|
|
142
|
+
|
|
143
|
+
# Check sample sizes
|
|
144
|
+
n1 = data_aug['period_1'].sum()
|
|
145
|
+
n2 = data_aug['period_2'].sum()
|
|
146
|
+
k = len(var_names) + 1 # +1 for intercept
|
|
147
|
+
|
|
148
|
+
if n1 < 2 * k or n2 < 2 * k:
|
|
149
|
+
raise ValueError(
|
|
150
|
+
f"Insufficient observations in subperiods. "
|
|
151
|
+
f"Need at least {2*k} in each, got n1={n1}, n2={n2}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Estimate restricted model (pooled)
|
|
155
|
+
try:
|
|
156
|
+
from panelbox.models.static.pooled_ols import PooledOLS
|
|
157
|
+
|
|
158
|
+
model_restricted = PooledOLS(formula, data_aug, entity_col, time_col)
|
|
159
|
+
results_restricted = model_restricted.fit()
|
|
160
|
+
ssr_restricted = np.sum(results_restricted.resid ** 2)
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
raise ValueError(f"Failed to estimate restricted model: {e}")
|
|
164
|
+
|
|
165
|
+
# Estimate unrestricted model (separate for each subperiod)
|
|
166
|
+
# Model 1: period < break_time
|
|
167
|
+
data_period1 = data_aug[data_aug['period_1'] == 1].copy()
|
|
168
|
+
model_1 = PooledOLS(formula, data_period1, entity_col, time_col)
|
|
169
|
+
results_1 = model_1.fit()
|
|
170
|
+
ssr_1 = np.sum(results_1.resid ** 2)
|
|
171
|
+
|
|
172
|
+
# Model 2: period >= break_time
|
|
173
|
+
data_period2 = data_aug[data_aug['period_2'] == 1].copy()
|
|
174
|
+
model_2 = PooledOLS(formula, data_period2, entity_col, time_col)
|
|
175
|
+
results_2 = model_2.fit()
|
|
176
|
+
ssr_2 = np.sum(results_2.resid ** 2)
|
|
177
|
+
|
|
178
|
+
# Unrestricted SSR (sum of both periods)
|
|
179
|
+
ssr_unrestricted = ssr_1 + ssr_2
|
|
180
|
+
|
|
181
|
+
# Chow F-statistic
|
|
182
|
+
# F = [(SSR_r - SSR_u) / k] / [SSR_u / (N - 2k)]
|
|
183
|
+
N = len(data_aug)
|
|
184
|
+
numerator = (ssr_restricted - ssr_unrestricted) / k
|
|
185
|
+
denominator = ssr_unrestricted / (N - 2 * k)
|
|
186
|
+
|
|
187
|
+
if denominator == 0:
|
|
188
|
+
raise ValueError("Denominator is zero. Perfect fit in subperiods.")
|
|
189
|
+
|
|
190
|
+
f_stat = numerator / denominator
|
|
191
|
+
|
|
192
|
+
# Degrees of freedom
|
|
193
|
+
df_num = k
|
|
194
|
+
df_denom = N - 2 * k
|
|
195
|
+
|
|
196
|
+
# P-value
|
|
197
|
+
pvalue = 1 - stats.f.cdf(f_stat, df_num, df_denom)
|
|
198
|
+
|
|
199
|
+
# Metadata
|
|
200
|
+
metadata = {
|
|
201
|
+
'break_point': break_time,
|
|
202
|
+
'break_index': break_idx,
|
|
203
|
+
'n_periods_total': n_periods,
|
|
204
|
+
'n_obs_period1': n1,
|
|
205
|
+
'n_obs_period2': n2,
|
|
206
|
+
'n_obs_total': N,
|
|
207
|
+
'ssr_restricted': float(ssr_restricted),
|
|
208
|
+
'ssr_unrestricted': float(ssr_unrestricted),
|
|
209
|
+
'ssr_period1': float(ssr_1),
|
|
210
|
+
'ssr_period2': float(ssr_2),
|
|
211
|
+
'k_parameters': k,
|
|
212
|
+
'coefficients_period1': results_1.params.to_dict(),
|
|
213
|
+
'coefficients_period2': results_2.params.to_dict()
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
result = ValidationTestResult(
|
|
217
|
+
test_name="Chow Test for Structural Break",
|
|
218
|
+
statistic=f_stat,
|
|
219
|
+
pvalue=pvalue,
|
|
220
|
+
null_hypothesis="No structural break (parameters stable)",
|
|
221
|
+
alternative_hypothesis=f"Structural break at t={break_time}",
|
|
222
|
+
alpha=alpha,
|
|
223
|
+
df=(df_num, df_denom),
|
|
224
|
+
metadata=metadata
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
def _get_data_full(self):
|
|
230
|
+
"""
|
|
231
|
+
Get full data including DataFrame, formula, and variable names.
|
|
232
|
+
|
|
233
|
+
Returns
|
|
234
|
+
-------
|
|
235
|
+
tuple
|
|
236
|
+
(data, formula, entity_col, time_col, var_names) or
|
|
237
|
+
(None, None, None, None, None) if not available
|
|
238
|
+
"""
|
|
239
|
+
if not hasattr(self.results, '_model'):
|
|
240
|
+
return None, None, None, None, None
|
|
241
|
+
|
|
242
|
+
model = self.results._model
|
|
243
|
+
|
|
244
|
+
if not (hasattr(model, 'formula_parser') and hasattr(model, 'data')):
|
|
245
|
+
return None, None, None, None, None
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
data = model.data.data.copy()
|
|
249
|
+
entity_col = model.data.entity_col
|
|
250
|
+
time_col = model.data.time_col
|
|
251
|
+
|
|
252
|
+
if hasattr(model, 'formula'):
|
|
253
|
+
formula = model.formula
|
|
254
|
+
else:
|
|
255
|
+
return None, None, None, None, None
|
|
256
|
+
|
|
257
|
+
if hasattr(model.formula_parser, 'rhs_terms'):
|
|
258
|
+
var_names = [
|
|
259
|
+
term for term in model.formula_parser.rhs_terms
|
|
260
|
+
if term.lower() not in ['intercept', '1']
|
|
261
|
+
]
|
|
262
|
+
else:
|
|
263
|
+
rhs = formula.split('~')[1].strip()
|
|
264
|
+
terms = [t.strip() for t in rhs.split('+')]
|
|
265
|
+
var_names = [
|
|
266
|
+
t for t in terms
|
|
267
|
+
if t.lower() not in ['1', 'intercept', '']
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
return data, formula, entity_col, time_col, var_names
|
|
271
|
+
|
|
272
|
+
except Exception:
|
|
273
|
+
return None, None, None, None, None
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hausman specification test for panel data.
|
|
3
|
+
|
|
4
|
+
This module provides the Hausman test for choosing between Fixed Effects
|
|
5
|
+
and Random Effects models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from scipy import stats
|
|
12
|
+
|
|
13
|
+
from panelbox.core.results import PanelResults
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HausmanTestResult:
|
|
17
|
+
"""
|
|
18
|
+
Container for Hausman test results.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
statistic : float
|
|
23
|
+
Chi-squared test statistic
|
|
24
|
+
pvalue : float
|
|
25
|
+
P-value
|
|
26
|
+
df : int
|
|
27
|
+
Degrees of freedom
|
|
28
|
+
conclusion : str
|
|
29
|
+
Interpretation of test result
|
|
30
|
+
fe_params : pd.Series
|
|
31
|
+
Fixed effects coefficients
|
|
32
|
+
re_params : pd.Series
|
|
33
|
+
Random effects coefficients
|
|
34
|
+
diff : pd.Series
|
|
35
|
+
Difference in coefficients (FE - RE)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
statistic: float,
|
|
41
|
+
pvalue: float,
|
|
42
|
+
df: int,
|
|
43
|
+
fe_params: pd.Series,
|
|
44
|
+
re_params: pd.Series,
|
|
45
|
+
diff: pd.Series,
|
|
46
|
+
alpha: float = 0.05
|
|
47
|
+
):
|
|
48
|
+
self.statistic = statistic
|
|
49
|
+
self.pvalue = pvalue
|
|
50
|
+
self.df = df
|
|
51
|
+
self.fe_params = fe_params
|
|
52
|
+
self.re_params = re_params
|
|
53
|
+
self.diff = diff
|
|
54
|
+
self.alpha = alpha
|
|
55
|
+
|
|
56
|
+
# Determine conclusion
|
|
57
|
+
if pvalue < alpha:
|
|
58
|
+
self.conclusion = (
|
|
59
|
+
f"Reject H0 at {alpha*100:.0f}% level. "
|
|
60
|
+
"Use Fixed Effects (RE is inconsistent)."
|
|
61
|
+
)
|
|
62
|
+
self.recommendation = "Fixed Effects"
|
|
63
|
+
else:
|
|
64
|
+
self.conclusion = (
|
|
65
|
+
f"Fail to reject H0 at {alpha*100:.0f}% level. "
|
|
66
|
+
"Random Effects is consistent and efficient."
|
|
67
|
+
)
|
|
68
|
+
self.recommendation = "Random Effects"
|
|
69
|
+
|
|
70
|
+
def __str__(self) -> str:
|
|
71
|
+
"""String representation."""
|
|
72
|
+
return self.summary()
|
|
73
|
+
|
|
74
|
+
def __repr__(self) -> str:
|
|
75
|
+
"""Repr."""
|
|
76
|
+
return f"HausmanTestResult(statistic={self.statistic:.3f}, pvalue={self.pvalue:.4f}, df={self.df})"
|
|
77
|
+
|
|
78
|
+
def summary(self) -> str:
|
|
79
|
+
"""
|
|
80
|
+
Generate formatted summary.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
str
|
|
85
|
+
Formatted test results
|
|
86
|
+
"""
|
|
87
|
+
lines = []
|
|
88
|
+
lines.append("=" * 70)
|
|
89
|
+
lines.append("HAUSMAN SPECIFICATION TEST")
|
|
90
|
+
lines.append("=" * 70)
|
|
91
|
+
lines.append("")
|
|
92
|
+
lines.append("H0: Random Effects is consistent (and efficient)")
|
|
93
|
+
lines.append("H1: Random Effects is inconsistent (use Fixed Effects)")
|
|
94
|
+
lines.append("")
|
|
95
|
+
lines.append("-" * 70)
|
|
96
|
+
lines.append(f"{'Test Statistic (Chi2)':<30} {self.statistic:>15.4f}")
|
|
97
|
+
lines.append(f"{'P-value':<30} {self.pvalue:>15.4f}")
|
|
98
|
+
lines.append(f"{'Degrees of Freedom':<30} {self.df:>15}")
|
|
99
|
+
lines.append("-" * 70)
|
|
100
|
+
lines.append("")
|
|
101
|
+
lines.append(f"Conclusion: {self.conclusion}")
|
|
102
|
+
lines.append(f"Recommendation: {self.recommendation}")
|
|
103
|
+
lines.append("")
|
|
104
|
+
|
|
105
|
+
# Coefficient comparison table
|
|
106
|
+
lines.append("=" * 70)
|
|
107
|
+
lines.append("COEFFICIENT COMPARISON")
|
|
108
|
+
lines.append("=" * 70)
|
|
109
|
+
lines.append(f"{'Variable':<15} {'Fixed Effects':<15} {'Random Effects':<15} {'Difference':<15}")
|
|
110
|
+
lines.append("-" * 70)
|
|
111
|
+
|
|
112
|
+
for var in self.fe_params.index:
|
|
113
|
+
fe_coef = self.fe_params[var]
|
|
114
|
+
re_coef = self.re_params[var]
|
|
115
|
+
diff_coef = self.diff[var]
|
|
116
|
+
|
|
117
|
+
lines.append(
|
|
118
|
+
f"{var:<15} {fe_coef:>14.4f} {re_coef:>14.4f} {diff_coef:>14.4f}"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
lines.append("=" * 70)
|
|
122
|
+
lines.append("")
|
|
123
|
+
|
|
124
|
+
return "\n".join(lines)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class HausmanTest:
|
|
128
|
+
"""
|
|
129
|
+
Hausman specification test for panel data.
|
|
130
|
+
|
|
131
|
+
Tests the null hypothesis that the Random Effects estimator is consistent
|
|
132
|
+
(and efficient) against the alternative that it is inconsistent.
|
|
133
|
+
|
|
134
|
+
The test compares Fixed Effects (always consistent under standard assumptions)
|
|
135
|
+
with Random Effects (consistent only if E[u_i | X_it] = 0).
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
fe_results : PanelResults
|
|
140
|
+
Results from Fixed Effects estimation
|
|
141
|
+
re_results : PanelResults
|
|
142
|
+
Results from Random Effects estimation
|
|
143
|
+
|
|
144
|
+
Examples
|
|
145
|
+
--------
|
|
146
|
+
>>> import panelbox as pb
|
|
147
|
+
>>>
|
|
148
|
+
>>> # Estimate both models
|
|
149
|
+
>>> fe = pb.FixedEffects("y ~ x1 + x2", data, "firm", "year")
|
|
150
|
+
>>> fe_results = fe.fit()
|
|
151
|
+
>>>
|
|
152
|
+
>>> re = pb.RandomEffects("y ~ x1 + x2", data, "firm", "year")
|
|
153
|
+
>>> re_results = re.fit()
|
|
154
|
+
>>>
|
|
155
|
+
>>> # Run Hausman test
|
|
156
|
+
>>> hausman = pb.HausmanTest(fe_results, re_results)
|
|
157
|
+
>>> result = hausman.run()
|
|
158
|
+
>>> print(result)
|
|
159
|
+
>>>
|
|
160
|
+
>>> # Use result
|
|
161
|
+
>>> if result.recommendation == "Fixed Effects":
|
|
162
|
+
... final_results = fe_results
|
|
163
|
+
>>> else:
|
|
164
|
+
... final_results = re_results
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
def __init__(
|
|
168
|
+
self,
|
|
169
|
+
fe_results: PanelResults,
|
|
170
|
+
re_results: PanelResults
|
|
171
|
+
):
|
|
172
|
+
if fe_results.model_type not in ['Fixed Effects', 'Fixed Effects (Two-Way)', 'Fixed Effects (Time)']:
|
|
173
|
+
raise ValueError("First argument must be Fixed Effects results")
|
|
174
|
+
|
|
175
|
+
if re_results.model_type not in ['Random Effects (GLS)', 'Random Effects']:
|
|
176
|
+
raise ValueError("Second argument must be Random Effects results")
|
|
177
|
+
|
|
178
|
+
self.fe_results = fe_results
|
|
179
|
+
self.re_results = re_results
|
|
180
|
+
|
|
181
|
+
# Find common coefficients (exclude Intercept for FE, keep for RE)
|
|
182
|
+
# FE doesn't have intercept, RE does
|
|
183
|
+
fe_vars = set(fe_results.params.index)
|
|
184
|
+
re_vars = set(re_results.params.index) - {'Intercept'} # Exclude intercept from comparison
|
|
185
|
+
|
|
186
|
+
self.common_vars = sorted(fe_vars & re_vars)
|
|
187
|
+
|
|
188
|
+
if len(self.common_vars) == 0:
|
|
189
|
+
raise ValueError("No common variables found between FE and RE models")
|
|
190
|
+
|
|
191
|
+
def run(self, alpha: float = 0.05) -> HausmanTestResult:
|
|
192
|
+
"""
|
|
193
|
+
Run the Hausman test.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
alpha : float, default=0.05
|
|
198
|
+
Significance level for test
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
HausmanTestResult
|
|
203
|
+
Test results
|
|
204
|
+
|
|
205
|
+
Notes
|
|
206
|
+
-----
|
|
207
|
+
The Hausman test statistic is:
|
|
208
|
+
|
|
209
|
+
H = (b_FE - b_RE)' [Var(b_FE) - Var(b_RE)]^{-1} (b_FE - b_RE)
|
|
210
|
+
|
|
211
|
+
which follows a chi-squared distribution with K degrees of freedom,
|
|
212
|
+
where K is the number of coefficients being tested.
|
|
213
|
+
|
|
214
|
+
Examples
|
|
215
|
+
--------
|
|
216
|
+
>>> result = hausman.run(alpha=0.05)
|
|
217
|
+
>>> print(f"Chi2 statistic: {result.statistic:.3f}")
|
|
218
|
+
>>> print(f"P-value: {result.pvalue:.4f}")
|
|
219
|
+
>>> print(f"Recommendation: {result.recommendation}")
|
|
220
|
+
"""
|
|
221
|
+
# Extract coefficients for common variables
|
|
222
|
+
beta_fe = self.fe_results.params[self.common_vars].values
|
|
223
|
+
beta_re = self.re_results.params[self.common_vars].values
|
|
224
|
+
|
|
225
|
+
# Difference in coefficients
|
|
226
|
+
diff = beta_fe - beta_re
|
|
227
|
+
|
|
228
|
+
# Extract covariance matrices
|
|
229
|
+
vcov_fe = self.fe_results.cov_params.loc[self.common_vars, self.common_vars].values
|
|
230
|
+
vcov_re = self.re_results.cov_params.loc[self.common_vars, self.common_vars].values
|
|
231
|
+
|
|
232
|
+
# Variance of difference: Var(b_FE - b_RE) = Var(b_FE) - Var(b_RE)
|
|
233
|
+
# Under H0, RE is efficient, so this is the correct variance
|
|
234
|
+
var_diff = vcov_fe - vcov_re
|
|
235
|
+
|
|
236
|
+
# Check if var_diff is positive definite
|
|
237
|
+
# If not, use the generalized inverse
|
|
238
|
+
try:
|
|
239
|
+
var_diff_inv = np.linalg.inv(var_diff)
|
|
240
|
+
except np.linalg.LinAlgError:
|
|
241
|
+
# Matrix is singular, use pseudo-inverse
|
|
242
|
+
var_diff_inv = np.linalg.pinv(var_diff)
|
|
243
|
+
|
|
244
|
+
# Hausman statistic: (b_FE - b_RE)' [Var(diff)]^{-1} (b_FE - b_RE)
|
|
245
|
+
statistic = float(diff.T @ var_diff_inv @ diff)
|
|
246
|
+
|
|
247
|
+
# Degrees of freedom
|
|
248
|
+
df = len(self.common_vars)
|
|
249
|
+
|
|
250
|
+
# P-value from chi-squared distribution
|
|
251
|
+
pvalue = 1 - stats.chi2.cdf(statistic, df)
|
|
252
|
+
|
|
253
|
+
# Create result object
|
|
254
|
+
result = HausmanTestResult(
|
|
255
|
+
statistic=statistic,
|
|
256
|
+
pvalue=pvalue,
|
|
257
|
+
df=df,
|
|
258
|
+
fe_params=self.fe_results.params[self.common_vars],
|
|
259
|
+
re_params=self.re_results.params[self.common_vars],
|
|
260
|
+
diff=pd.Series(diff, index=self.common_vars),
|
|
261
|
+
alpha=alpha
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return result
|