panelbox 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- panelbox/__init__.py +67 -0
- panelbox/__version__.py +14 -0
- panelbox/cli/__init__.py +0 -0
- panelbox/cli/{commands}/__init__.py +0 -0
- panelbox/core/__init__.py +0 -0
- panelbox/core/base_model.py +164 -0
- panelbox/core/formula_parser.py +318 -0
- panelbox/core/panel_data.py +387 -0
- panelbox/core/results.py +366 -0
- panelbox/datasets/__init__.py +0 -0
- panelbox/datasets/{data}/__init__.py +0 -0
- panelbox/gmm/__init__.py +65 -0
- panelbox/gmm/difference_gmm.py +645 -0
- panelbox/gmm/estimator.py +562 -0
- panelbox/gmm/instruments.py +580 -0
- panelbox/gmm/results.py +550 -0
- panelbox/gmm/system_gmm.py +621 -0
- panelbox/gmm/tests.py +535 -0
- panelbox/models/__init__.py +11 -0
- panelbox/models/dynamic/__init__.py +0 -0
- panelbox/models/iv/__init__.py +0 -0
- panelbox/models/static/__init__.py +13 -0
- panelbox/models/static/fixed_effects.py +516 -0
- panelbox/models/static/pooled_ols.py +298 -0
- panelbox/models/static/random_effects.py +512 -0
- panelbox/report/__init__.py +61 -0
- panelbox/report/asset_manager.py +410 -0
- panelbox/report/css_manager.py +472 -0
- panelbox/report/exporters/__init__.py +15 -0
- panelbox/report/exporters/html_exporter.py +440 -0
- panelbox/report/exporters/latex_exporter.py +510 -0
- panelbox/report/exporters/markdown_exporter.py +446 -0
- panelbox/report/renderers/__init__.py +11 -0
- panelbox/report/renderers/static/__init__.py +0 -0
- panelbox/report/renderers/static_validation_renderer.py +341 -0
- panelbox/report/report_manager.py +502 -0
- panelbox/report/template_manager.py +337 -0
- panelbox/report/transformers/__init__.py +0 -0
- panelbox/report/transformers/static/__init__.py +0 -0
- panelbox/report/validation_transformer.py +449 -0
- panelbox/standard_errors/__init__.py +0 -0
- panelbox/templates/__init__.py +0 -0
- panelbox/templates/assets/css/base_styles.css +382 -0
- panelbox/templates/assets/css/report_components.css +747 -0
- panelbox/templates/assets/js/tab-navigation.js +161 -0
- panelbox/templates/assets/js/utils.js +276 -0
- panelbox/templates/common/footer.html +24 -0
- panelbox/templates/common/header.html +44 -0
- panelbox/templates/common/meta.html +5 -0
- panelbox/templates/validation/interactive/index.html +272 -0
- panelbox/templates/validation/interactive/partials/charts.html +58 -0
- panelbox/templates/validation/interactive/partials/methodology.html +201 -0
- panelbox/templates/validation/interactive/partials/overview.html +146 -0
- panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
- panelbox/templates/validation/interactive/partials/test_results.html +231 -0
- panelbox/utils/__init__.py +0 -0
- panelbox/utils/formatting.py +172 -0
- panelbox/utils/matrix_ops.py +233 -0
- panelbox/utils/statistical.py +173 -0
- panelbox/validation/__init__.py +58 -0
- panelbox/validation/base.py +175 -0
- panelbox/validation/cointegration/__init__.py +0 -0
- panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
- panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
- panelbox/validation/cross_sectional_dependence/frees.py +297 -0
- panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
- panelbox/validation/heteroskedasticity/__init__.py +13 -0
- panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
- panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
- panelbox/validation/heteroskedasticity/white.py +208 -0
- panelbox/validation/instruments/__init__.py +0 -0
- panelbox/validation/robustness/__init__.py +0 -0
- panelbox/validation/serial_correlation/__init__.py +13 -0
- panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
- panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
- panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
- panelbox/validation/specification/__init__.py +16 -0
- panelbox/validation/specification/chow.py +273 -0
- panelbox/validation/specification/hausman.py +264 -0
- panelbox/validation/specification/mundlak.py +331 -0
- panelbox/validation/specification/reset.py +273 -0
- panelbox/validation/unit_root/__init__.py +0 -0
- panelbox/validation/validation_report.py +257 -0
- panelbox/validation/validation_suite.py +401 -0
- panelbox-0.2.0.dist-info/METADATA +337 -0
- panelbox-0.2.0.dist-info/RECORD +90 -0
- panelbox-0.2.0.dist-info/WHEEL +5 -0
- panelbox-0.2.0.dist-info/entry_points.txt +2 -0
- panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
- panelbox-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Breusch-Pagan LM test for cross-sectional dependence in panel data.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Breusch, T. S., & Pagan, A. R. (1980). The Lagrange Multiplier Test and its
|
|
7
|
+
Applications to Model Specification in Econometrics. The Review of Economic
|
|
8
|
+
Studies, 47(1), 239-253.
|
|
9
|
+
|
|
10
|
+
Pesaran, M. H. (2004). General Diagnostic Tests for Cross Section Dependence
|
|
11
|
+
in Panels. Cambridge Working Papers in Economics No. 0435.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from scipy import stats
|
|
17
|
+
|
|
18
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BreuschPaganLMTest(ValidationTest):
|
|
22
|
+
"""
|
|
23
|
+
Breusch-Pagan LM test for cross-sectional dependence.
|
|
24
|
+
|
|
25
|
+
Tests the null hypothesis that residuals are cross-sectionally
|
|
26
|
+
independent (no contemporaneous correlation across entities).
|
|
27
|
+
|
|
28
|
+
H0: Corr(e_it, e_jt) = 0 for all i ≠ j
|
|
29
|
+
H1: Some Corr(e_it, e_jt) ≠ 0
|
|
30
|
+
|
|
31
|
+
The test is based on the sum of squared pairwise correlation
|
|
32
|
+
coefficients of residuals.
|
|
33
|
+
|
|
34
|
+
Notes
|
|
35
|
+
-----
|
|
36
|
+
The test statistic is:
|
|
37
|
+
|
|
38
|
+
LM = T * sum_{i<j} rho_ij²
|
|
39
|
+
|
|
40
|
+
where rho_ij is the sample correlation between residuals of
|
|
41
|
+
entity i and entity j, and the sum is over all N(N-1)/2 pairs.
|
|
42
|
+
|
|
43
|
+
Under H0, LM ~ Chi2(N(N-1)/2)
|
|
44
|
+
|
|
45
|
+
This test is appropriate for panels with:
|
|
46
|
+
- Fixed T (time periods)
|
|
47
|
+
- N not too large (becomes over-sized as N → ∞)
|
|
48
|
+
- For large N, use Pesaran CD test instead
|
|
49
|
+
|
|
50
|
+
The test requires a balanced panel or will use pairwise complete
|
|
51
|
+
observations for each entity pair.
|
|
52
|
+
|
|
53
|
+
Examples
|
|
54
|
+
--------
|
|
55
|
+
>>> from panelbox.models.static.pooled_ols import PooledOLS
|
|
56
|
+
>>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
|
|
57
|
+
>>> results = model.fit()
|
|
58
|
+
>>>
|
|
59
|
+
>>> from panelbox.validation.cross_sectional_dependence.breusch_pagan_lm import BreuschPaganLMTest
|
|
60
|
+
>>> test = BreuschPaganLMTest(results)
|
|
61
|
+
>>> result = test.run()
|
|
62
|
+
>>> print(result)
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, results: 'PanelResults'):
|
|
66
|
+
"""
|
|
67
|
+
Initialize Breusch-Pagan LM test.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
results : PanelResults
|
|
72
|
+
Results from panel model estimation
|
|
73
|
+
"""
|
|
74
|
+
super().__init__(results)
|
|
75
|
+
|
|
76
|
+
def run(self, alpha: float = 0.05) -> ValidationTestResult:
|
|
77
|
+
"""
|
|
78
|
+
Run Breusch-Pagan LM test for cross-sectional dependence.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
alpha : float, default=0.05
|
|
83
|
+
Significance level
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
ValidationTestResult
|
|
88
|
+
Test results
|
|
89
|
+
|
|
90
|
+
Warnings
|
|
91
|
+
--------
|
|
92
|
+
This test can be over-sized (reject H0 too often) when N is large.
|
|
93
|
+
For large N (> 30), consider using the Pesaran CD test instead.
|
|
94
|
+
|
|
95
|
+
Notes
|
|
96
|
+
-----
|
|
97
|
+
The test requires computing N(N-1)/2 pairwise correlations.
|
|
98
|
+
For large N, this can be computationally intensive.
|
|
99
|
+
"""
|
|
100
|
+
# Get residuals with entity and time structure
|
|
101
|
+
resid_df = self._prepare_residual_data()
|
|
102
|
+
|
|
103
|
+
# Create wide format: rows = time, columns = entities
|
|
104
|
+
resid_wide = resid_df.pivot(index='time', columns='entity', values='resid')
|
|
105
|
+
|
|
106
|
+
# Get dimensions
|
|
107
|
+
T = resid_wide.shape[0] # Number of time periods
|
|
108
|
+
N = resid_wide.shape[1] # Number of entities
|
|
109
|
+
|
|
110
|
+
if N < 2:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
"Need at least 2 entities for cross-sectional dependence test"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Compute pairwise correlations
|
|
116
|
+
# Use pairwise complete observations
|
|
117
|
+
correlations = []
|
|
118
|
+
n_pairs = 0
|
|
119
|
+
|
|
120
|
+
entity_list = list(resid_wide.columns)
|
|
121
|
+
|
|
122
|
+
for i in range(N):
|
|
123
|
+
for j in range(i + 1, N):
|
|
124
|
+
entity_i = entity_list[i]
|
|
125
|
+
entity_j = entity_list[j]
|
|
126
|
+
|
|
127
|
+
# Get residuals for this pair (drop NaN)
|
|
128
|
+
resid_i = resid_wide[entity_i].dropna()
|
|
129
|
+
resid_j = resid_wide[entity_j].dropna()
|
|
130
|
+
|
|
131
|
+
# Find common time periods
|
|
132
|
+
common_times = resid_i.index.intersection(resid_j.index)
|
|
133
|
+
|
|
134
|
+
if len(common_times) >= 3: # Need at least 3 obs for correlation
|
|
135
|
+
resid_i_common = resid_i.loc[common_times]
|
|
136
|
+
resid_j_common = resid_j.loc[common_times]
|
|
137
|
+
|
|
138
|
+
# Compute correlation
|
|
139
|
+
rho_ij = np.corrcoef(resid_i_common, resid_j_common)[0, 1]
|
|
140
|
+
|
|
141
|
+
# Handle potential NaN from constant series
|
|
142
|
+
if not np.isnan(rho_ij):
|
|
143
|
+
correlations.append(rho_ij)
|
|
144
|
+
n_pairs += 1
|
|
145
|
+
|
|
146
|
+
if n_pairs == 0:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"No valid pairwise correlations could be computed. "
|
|
149
|
+
"Check for constant residuals or insufficient data."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Compute LM statistic
|
|
153
|
+
# LM = T * sum(rho_ij²)
|
|
154
|
+
correlations = np.array(correlations)
|
|
155
|
+
lm_stat = T * np.sum(correlations ** 2)
|
|
156
|
+
|
|
157
|
+
# Degrees of freedom = number of pairs
|
|
158
|
+
# For complete data: N(N-1)/2
|
|
159
|
+
# For incomplete data: actual number of pairs
|
|
160
|
+
df = n_pairs
|
|
161
|
+
|
|
162
|
+
# P-value from chi-squared distribution
|
|
163
|
+
pvalue = 1 - stats.chi2.cdf(lm_stat, df)
|
|
164
|
+
|
|
165
|
+
# Metadata
|
|
166
|
+
mean_abs_corr = np.mean(np.abs(correlations))
|
|
167
|
+
max_abs_corr = np.max(np.abs(correlations))
|
|
168
|
+
positive_corrs = np.sum(correlations > 0)
|
|
169
|
+
negative_corrs = np.sum(correlations < 0)
|
|
170
|
+
|
|
171
|
+
metadata = {
|
|
172
|
+
'n_entities': int(N),
|
|
173
|
+
'n_time_periods': int(T),
|
|
174
|
+
'n_pairs': int(n_pairs),
|
|
175
|
+
'n_pairs_expected': int(N * (N - 1) // 2),
|
|
176
|
+
'mean_abs_correlation': float(mean_abs_corr),
|
|
177
|
+
'max_abs_correlation': float(max_abs_corr),
|
|
178
|
+
'n_positive_correlations': int(positive_corrs),
|
|
179
|
+
'n_negative_correlations': int(negative_corrs),
|
|
180
|
+
'warning': (
|
|
181
|
+
'Test may be over-sized for large N. '
|
|
182
|
+
'Consider Pesaran CD test if N > 30.'
|
|
183
|
+
if N > 30 else None
|
|
184
|
+
)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
result = ValidationTestResult(
|
|
188
|
+
test_name="Breusch-Pagan LM Test for Cross-Sectional Dependence",
|
|
189
|
+
statistic=lm_stat,
|
|
190
|
+
pvalue=pvalue,
|
|
191
|
+
null_hypothesis="No cross-sectional dependence (residuals independent across entities)",
|
|
192
|
+
alternative_hypothesis="Cross-sectional dependence present",
|
|
193
|
+
alpha=alpha,
|
|
194
|
+
df=df,
|
|
195
|
+
metadata=metadata
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
def _prepare_residual_data(self) -> pd.DataFrame:
|
|
201
|
+
"""
|
|
202
|
+
Prepare residual data with entity and time identifiers.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
pd.DataFrame
|
|
207
|
+
DataFrame with columns: entity, time, resid
|
|
208
|
+
"""
|
|
209
|
+
if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
|
|
210
|
+
resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
|
|
211
|
+
|
|
212
|
+
resid_df = pd.DataFrame({
|
|
213
|
+
'entity': self.results.entity_index,
|
|
214
|
+
'time': self.results.time_index,
|
|
215
|
+
'resid': resid_flat
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
return resid_df
|
|
219
|
+
else:
|
|
220
|
+
raise AttributeError(
|
|
221
|
+
"Results object must have 'entity_index' and 'time_index' attributes"
|
|
222
|
+
)
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Frees test for cross-sectional dependence in panel data.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Frees, E. W. (1995). Assessing Cross-Sectional Correlation in Panel Data.
|
|
7
|
+
Journal of Econometrics, 69(2), 393-414.
|
|
8
|
+
|
|
9
|
+
Frees, E. W. (2004). Longitudinal and Panel Data: Analysis and Applications
|
|
10
|
+
in the Social Sciences. Cambridge University Press.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from scipy import stats
|
|
16
|
+
|
|
17
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FreesTest(ValidationTest):
|
|
21
|
+
"""
|
|
22
|
+
Frees test for cross-sectional dependence in panel data.
|
|
23
|
+
|
|
24
|
+
This is a non-parametric test that uses the Friedman-type rank
|
|
25
|
+
statistic to test for cross-sectional dependence.
|
|
26
|
+
|
|
27
|
+
H0: No cross-sectional dependence (residuals independent across entities)
|
|
28
|
+
H1: Cross-sectional dependence present
|
|
29
|
+
|
|
30
|
+
The test is robust to non-normality and heteroskedasticity.
|
|
31
|
+
|
|
32
|
+
Notes
|
|
33
|
+
-----
|
|
34
|
+
The test statistic is based on the average Spearman rank correlation:
|
|
35
|
+
|
|
36
|
+
Q_F = sum_{i<j} R_ij² / [N(N-1)/2]
|
|
37
|
+
|
|
38
|
+
where R_ij is the Spearman rank correlation between residuals
|
|
39
|
+
of entities i and j.
|
|
40
|
+
|
|
41
|
+
Under H0, Q_F follows an approximate distribution that can be
|
|
42
|
+
compared to critical values tabulated by Frees (1995, 2004).
|
|
43
|
+
|
|
44
|
+
For large samples, we use asymptotic approximation:
|
|
45
|
+
sqrt(T) * Q_F is approximately normal.
|
|
46
|
+
|
|
47
|
+
Advantages over Breusch-Pagan LM test:
|
|
48
|
+
- Non-parametric (doesn't require normality)
|
|
49
|
+
- More robust to outliers
|
|
50
|
+
- Better size properties for moderate N
|
|
51
|
+
|
|
52
|
+
Examples
|
|
53
|
+
--------
|
|
54
|
+
>>> from panelbox.models.static.pooled_ols import PooledOLS
|
|
55
|
+
>>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
|
|
56
|
+
>>> results = model.fit()
|
|
57
|
+
>>>
|
|
58
|
+
>>> from panelbox.validation.cross_sectional_dependence.frees import FreesTest
|
|
59
|
+
>>> test = FreesTest(results)
|
|
60
|
+
>>> result = test.run()
|
|
61
|
+
>>> print(result)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, results: 'PanelResults'):
|
|
65
|
+
"""
|
|
66
|
+
Initialize Frees test.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
results : PanelResults
|
|
71
|
+
Results from panel model estimation
|
|
72
|
+
"""
|
|
73
|
+
super().__init__(results)
|
|
74
|
+
|
|
75
|
+
def run(self, alpha: float = 0.05) -> ValidationTestResult:
|
|
76
|
+
"""
|
|
77
|
+
Run Frees test for cross-sectional dependence.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
alpha : float, default=0.05
|
|
82
|
+
Significance level
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
ValidationTestResult
|
|
87
|
+
Test results
|
|
88
|
+
|
|
89
|
+
Notes
|
|
90
|
+
-----
|
|
91
|
+
The test uses asymptotic approximation for p-values.
|
|
92
|
+
Critical values from Frees (1995) are also provided in metadata
|
|
93
|
+
for reference.
|
|
94
|
+
"""
|
|
95
|
+
# Get residuals with entity and time structure
|
|
96
|
+
resid_df = self._prepare_residual_data()
|
|
97
|
+
|
|
98
|
+
# Create wide format: rows = time, columns = entities
|
|
99
|
+
resid_wide = resid_df.pivot(index='time', columns='entity', values='resid')
|
|
100
|
+
|
|
101
|
+
# Get dimensions
|
|
102
|
+
T = resid_wide.shape[0] # Number of time periods
|
|
103
|
+
N = resid_wide.shape[1] # Number of entities
|
|
104
|
+
|
|
105
|
+
if N < 2:
|
|
106
|
+
raise ValueError(
|
|
107
|
+
"Need at least 2 entities for cross-sectional dependence test"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if T < 3:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
"Need at least 3 time periods for Frees test"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Compute pairwise Spearman correlations
|
|
116
|
+
rank_correlations = []
|
|
117
|
+
n_pairs = 0
|
|
118
|
+
|
|
119
|
+
entity_list = list(resid_wide.columns)
|
|
120
|
+
|
|
121
|
+
for i in range(N):
|
|
122
|
+
for j in range(i + 1, N):
|
|
123
|
+
entity_i = entity_list[i]
|
|
124
|
+
entity_j = entity_list[j]
|
|
125
|
+
|
|
126
|
+
# Get residuals for this pair (drop NaN)
|
|
127
|
+
resid_i = resid_wide[entity_i].dropna()
|
|
128
|
+
resid_j = resid_wide[entity_j].dropna()
|
|
129
|
+
|
|
130
|
+
# Find common time periods
|
|
131
|
+
common_times = resid_i.index.intersection(resid_j.index)
|
|
132
|
+
|
|
133
|
+
if len(common_times) >= 3: # Need at least 3 obs for rank correlation
|
|
134
|
+
resid_i_common = resid_i.loc[common_times].values
|
|
135
|
+
resid_j_common = resid_j.loc[common_times].values
|
|
136
|
+
|
|
137
|
+
# Compute Spearman rank correlation
|
|
138
|
+
# This is robust to outliers and non-normality
|
|
139
|
+
rho_rank, _ = stats.spearmanr(resid_i_common, resid_j_common)
|
|
140
|
+
|
|
141
|
+
# Handle potential NaN from constant series
|
|
142
|
+
if not np.isnan(rho_rank):
|
|
143
|
+
rank_correlations.append(rho_rank)
|
|
144
|
+
n_pairs += 1
|
|
145
|
+
|
|
146
|
+
if n_pairs == 0:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"No valid pairwise rank correlations could be computed. "
|
|
149
|
+
"Check for constant residuals or insufficient data."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Compute Frees statistic
|
|
153
|
+
# Q_F = mean of squared rank correlations
|
|
154
|
+
rank_correlations = np.array(rank_correlations)
|
|
155
|
+
q_frees = np.mean(rank_correlations ** 2)
|
|
156
|
+
|
|
157
|
+
# Asymptotic distribution
|
|
158
|
+
# Under H0: E[Q_F] = 1/(T-1)
|
|
159
|
+
# Var[Q_F] ≈ 2(T-3) / [(T+1)(T-1)²]
|
|
160
|
+
expected_qf = 1 / (T - 1)
|
|
161
|
+
var_qf = 2 * (T - 3) / ((T + 1) * (T - 1) ** 2)
|
|
162
|
+
se_qf = np.sqrt(var_qf / n_pairs) # SE of mean
|
|
163
|
+
|
|
164
|
+
# Test statistic: standardized Q_F
|
|
165
|
+
if se_qf > 0:
|
|
166
|
+
z_stat = (q_frees - expected_qf) / se_qf
|
|
167
|
+
else:
|
|
168
|
+
z_stat = 0.0
|
|
169
|
+
|
|
170
|
+
# P-value (two-sided)
|
|
171
|
+
pvalue = 2 * (1 - stats.norm.cdf(abs(z_stat)))
|
|
172
|
+
|
|
173
|
+
# Critical values from Frees (1995) Table 1
|
|
174
|
+
# These are for balanced panels at alpha=0.05
|
|
175
|
+
critical_values = self._get_critical_values(T, N)
|
|
176
|
+
|
|
177
|
+
# Simple interpretation
|
|
178
|
+
interpretation = (
|
|
179
|
+
"Reject H0 (cross-sectional dependence detected)"
|
|
180
|
+
if q_frees > critical_values.get('alpha_0.05', float('inf'))
|
|
181
|
+
else "Do not reject H0 (no evidence of cross-sectional dependence)"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Metadata
|
|
185
|
+
mean_abs_rank_corr = np.mean(np.abs(rank_correlations))
|
|
186
|
+
max_abs_rank_corr = np.max(np.abs(rank_correlations))
|
|
187
|
+
|
|
188
|
+
metadata = {
|
|
189
|
+
'q_frees_statistic': float(q_frees),
|
|
190
|
+
'z_statistic': float(z_stat),
|
|
191
|
+
'expected_qf_under_h0': float(expected_qf),
|
|
192
|
+
'n_entities': int(N),
|
|
193
|
+
'n_time_periods': int(T),
|
|
194
|
+
'n_pairs': int(n_pairs),
|
|
195
|
+
'mean_abs_rank_correlation': float(mean_abs_rank_corr),
|
|
196
|
+
'max_abs_rank_correlation': float(max_abs_rank_corr),
|
|
197
|
+
'critical_values': critical_values,
|
|
198
|
+
'interpretation': interpretation,
|
|
199
|
+
'note': (
|
|
200
|
+
'Frees test is non-parametric and robust to non-normality. '
|
|
201
|
+
'Critical values are approximate for unbalanced panels.'
|
|
202
|
+
)
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
result = ValidationTestResult(
|
|
206
|
+
test_name="Frees Test for Cross-Sectional Dependence",
|
|
207
|
+
statistic=z_stat,
|
|
208
|
+
pvalue=pvalue,
|
|
209
|
+
null_hypothesis="No cross-sectional dependence (residuals independent across entities)",
|
|
210
|
+
alternative_hypothesis="Cross-sectional dependence present",
|
|
211
|
+
alpha=alpha,
|
|
212
|
+
df=None, # Non-parametric test
|
|
213
|
+
metadata=metadata
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return result
|
|
217
|
+
|
|
218
|
+
def _get_critical_values(self, T, N):
|
|
219
|
+
"""
|
|
220
|
+
Get critical values from Frees (1995) Table 1.
|
|
221
|
+
|
|
222
|
+
Parameters
|
|
223
|
+
----------
|
|
224
|
+
T : int
|
|
225
|
+
Number of time periods
|
|
226
|
+
N : int
|
|
227
|
+
Number of entities
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
dict
|
|
232
|
+
Critical values at different significance levels
|
|
233
|
+
|
|
234
|
+
Notes
|
|
235
|
+
-----
|
|
236
|
+
These are approximate critical values for balanced panels.
|
|
237
|
+
Interpolation is used for T values not in the table.
|
|
238
|
+
"""
|
|
239
|
+
# Approximate critical values from Frees (1995, 2004)
|
|
240
|
+
# Q_F critical values at alpha = 0.10, 0.05, 0.01
|
|
241
|
+
# Rows: T, Columns: significance levels
|
|
242
|
+
|
|
243
|
+
# For small T, use tabulated values
|
|
244
|
+
if T <= 5:
|
|
245
|
+
cv_0_10 = 0.3000 # Very approximate
|
|
246
|
+
cv_0_05 = 0.4000
|
|
247
|
+
cv_0_01 = 0.6000
|
|
248
|
+
elif T <= 10:
|
|
249
|
+
cv_0_10 = 0.1429
|
|
250
|
+
cv_0_05 = 0.2000
|
|
251
|
+
cv_0_01 = 0.3000
|
|
252
|
+
elif T <= 20:
|
|
253
|
+
cv_0_10 = 0.0800
|
|
254
|
+
cv_0_05 = 0.1100
|
|
255
|
+
cv_0_01 = 0.1700
|
|
256
|
+
elif T <= 30:
|
|
257
|
+
cv_0_10 = 0.0543
|
|
258
|
+
cv_0_05 = 0.0754
|
|
259
|
+
cv_0_01 = 0.1170
|
|
260
|
+
else:
|
|
261
|
+
# Asymptotic approximation
|
|
262
|
+
cv_0_10 = 1.28 * np.sqrt(2 * (T - 3) / ((T + 1) * (T - 1) ** 2)) + 1 / (T - 1)
|
|
263
|
+
cv_0_05 = 1.96 * np.sqrt(2 * (T - 3) / ((T + 1) * (T - 1) ** 2)) + 1 / (T - 1)
|
|
264
|
+
cv_0_01 = 2.58 * np.sqrt(2 * (T - 3) / ((T + 1) * (T - 1) ** 2)) + 1 / (T - 1)
|
|
265
|
+
|
|
266
|
+
return {
|
|
267
|
+
'alpha_0.10': cv_0_10,
|
|
268
|
+
'alpha_0.05': cv_0_05,
|
|
269
|
+
'alpha_0.01': cv_0_01,
|
|
270
|
+
'T': T,
|
|
271
|
+
'N': N,
|
|
272
|
+
'note': 'Approximate critical values from Frees (1995)'
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
def _prepare_residual_data(self) -> pd.DataFrame:
|
|
276
|
+
"""
|
|
277
|
+
Prepare residual data with entity and time identifiers.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
pd.DataFrame
|
|
282
|
+
DataFrame with columns: entity, time, resid
|
|
283
|
+
"""
|
|
284
|
+
if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
|
|
285
|
+
resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
|
|
286
|
+
|
|
287
|
+
resid_df = pd.DataFrame({
|
|
288
|
+
'entity': self.results.entity_index,
|
|
289
|
+
'time': self.results.time_index,
|
|
290
|
+
'resid': resid_flat
|
|
291
|
+
})
|
|
292
|
+
|
|
293
|
+
return resid_df
|
|
294
|
+
else:
|
|
295
|
+
raise AttributeError(
|
|
296
|
+
"Results object must have 'entity_index' and 'time_index' attributes"
|
|
297
|
+
)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pesaran CD test for cross-sectional dependence in panel data.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Pesaran, M. H. (2004). General diagnostic tests for cross section dependence
|
|
7
|
+
in panels. University of Cambridge, Faculty of Economics, Cambridge Working
|
|
8
|
+
Papers in Economics No. 0435.
|
|
9
|
+
|
|
10
|
+
Stata command: xtcd
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from scipy import stats
|
|
16
|
+
from itertools import combinations
|
|
17
|
+
|
|
18
|
+
from panelbox.validation.base import ValidationTest, ValidationTestResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PesaranCDTest(ValidationTest):
|
|
22
|
+
"""
|
|
23
|
+
Pesaran CD test for cross-sectional dependence.
|
|
24
|
+
|
|
25
|
+
Tests the null hypothesis of cross-sectional independence against the
|
|
26
|
+
alternative of cross-sectional dependence.
|
|
27
|
+
|
|
28
|
+
The test is based on the average of pairwise correlation coefficients
|
|
29
|
+
of the residuals.
|
|
30
|
+
|
|
31
|
+
H0: No cross-sectional dependence (residuals are independent across entities)
|
|
32
|
+
H1: Cross-sectional dependence present
|
|
33
|
+
|
|
34
|
+
Notes
|
|
35
|
+
-----
|
|
36
|
+
The test statistic is asymptotically distributed as N(0,1) under the null
|
|
37
|
+
hypothesis. The test works well for both N large, T small and N large, T large
|
|
38
|
+
panels.
|
|
39
|
+
|
|
40
|
+
The test requires T > 3 and works best when N is reasonably large.
|
|
41
|
+
|
|
42
|
+
Examples
|
|
43
|
+
--------
|
|
44
|
+
>>> from panelbox.models.static.fixed_effects import FixedEffects
|
|
45
|
+
>>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
|
|
46
|
+
>>> results = fe.fit()
|
|
47
|
+
>>>
|
|
48
|
+
>>> from panelbox.validation.cross_sectional.pesaran_cd import PesaranCDTest
|
|
49
|
+
>>> test = PesaranCDTest(results)
|
|
50
|
+
>>> result = test.run()
|
|
51
|
+
>>> print(result)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, results: 'PanelResults'):
|
|
55
|
+
"""
|
|
56
|
+
Initialize Pesaran CD test.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
results : PanelResults
|
|
61
|
+
Results from panel model estimation
|
|
62
|
+
"""
|
|
63
|
+
super().__init__(results)
|
|
64
|
+
|
|
65
|
+
def run(self, alpha: float = 0.05) -> ValidationTestResult:
|
|
66
|
+
"""
|
|
67
|
+
Run Pesaran CD test for cross-sectional dependence.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
alpha : float, default=0.05
|
|
72
|
+
Significance level
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
ValidationTestResult
|
|
77
|
+
Test results
|
|
78
|
+
|
|
79
|
+
Raises
|
|
80
|
+
------
|
|
81
|
+
ValueError
|
|
82
|
+
If required data indices are not available or T < 3
|
|
83
|
+
"""
|
|
84
|
+
# Get residuals with entity and time information
|
|
85
|
+
resid_df = self._prepare_residual_data()
|
|
86
|
+
|
|
87
|
+
# Reshape residuals to wide format (entities as columns, time as rows)
|
|
88
|
+
resid_wide = resid_df.pivot(index='time', columns='entity', values='resid')
|
|
89
|
+
|
|
90
|
+
# Check minimum time periods
|
|
91
|
+
T = len(resid_wide)
|
|
92
|
+
if T < 3:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"Pesaran CD test requires at least 3 time periods. Found: {T}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
N = len(resid_wide.columns)
|
|
98
|
+
|
|
99
|
+
# Compute pairwise correlations
|
|
100
|
+
correlations = []
|
|
101
|
+
T_ij_list = [] # Effective sample size for each pair
|
|
102
|
+
|
|
103
|
+
for i, j in combinations(range(N), 2):
|
|
104
|
+
# Get residuals for entities i and j
|
|
105
|
+
e_i = resid_wide.iloc[:, i]
|
|
106
|
+
e_j = resid_wide.iloc[:, j]
|
|
107
|
+
|
|
108
|
+
# Drop missing values for this pair
|
|
109
|
+
valid = ~(e_i.isna() | e_j.isna())
|
|
110
|
+
e_i_valid = e_i[valid]
|
|
111
|
+
e_j_valid = e_j[valid]
|
|
112
|
+
|
|
113
|
+
T_ij = len(e_i_valid)
|
|
114
|
+
|
|
115
|
+
if T_ij >= 3: # Need at least 3 observations to compute correlation
|
|
116
|
+
# Correlation coefficient
|
|
117
|
+
rho_ij = np.corrcoef(e_i_valid, e_j_valid)[0, 1]
|
|
118
|
+
correlations.append(rho_ij)
|
|
119
|
+
T_ij_list.append(T_ij)
|
|
120
|
+
|
|
121
|
+
if len(correlations) == 0:
|
|
122
|
+
raise ValueError("No valid pairwise correlations could be computed")
|
|
123
|
+
|
|
124
|
+
# Pesaran CD statistic
|
|
125
|
+
# CD = sqrt(2T / (N(N-1))) * sum(rho_ij)
|
|
126
|
+
rho_sum = np.sum(correlations)
|
|
127
|
+
|
|
128
|
+
# Use average T for unbalanced panels
|
|
129
|
+
T_avg = np.mean(T_ij_list) if len(T_ij_list) > 0 else T
|
|
130
|
+
|
|
131
|
+
cd_stat = np.sqrt(2 * T_avg / (N * (N - 1))) * rho_sum
|
|
132
|
+
|
|
133
|
+
# Under H0, CD ~ N(0,1)
|
|
134
|
+
pvalue = 2 * (1 - stats.norm.cdf(np.abs(cd_stat)))
|
|
135
|
+
|
|
136
|
+
# Average absolute correlation
|
|
137
|
+
avg_abs_corr = np.mean(np.abs(correlations))
|
|
138
|
+
|
|
139
|
+
# Metadata
|
|
140
|
+
metadata = {
|
|
141
|
+
'n_entities': N,
|
|
142
|
+
'n_time_periods': T,
|
|
143
|
+
'n_pairs': len(correlations),
|
|
144
|
+
'avg_correlation': np.mean(correlations),
|
|
145
|
+
'avg_abs_correlation': avg_abs_corr,
|
|
146
|
+
'max_abs_correlation': np.max(np.abs(correlations)),
|
|
147
|
+
'min_correlation': np.min(correlations),
|
|
148
|
+
'max_correlation': np.max(correlations)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
result = ValidationTestResult(
|
|
152
|
+
test_name="Pesaran CD Test for Cross-Sectional Dependence",
|
|
153
|
+
statistic=cd_stat,
|
|
154
|
+
pvalue=pvalue,
|
|
155
|
+
null_hypothesis="No cross-sectional dependence",
|
|
156
|
+
alternative_hypothesis="Cross-sectional dependence present",
|
|
157
|
+
alpha=alpha,
|
|
158
|
+
df=None,
|
|
159
|
+
metadata=metadata
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
def _prepare_residual_data(self) -> pd.DataFrame:
|
|
165
|
+
"""
|
|
166
|
+
Prepare residual data with entity and time identifiers.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
pd.DataFrame
|
|
171
|
+
DataFrame with columns: entity, time, resid
|
|
172
|
+
"""
|
|
173
|
+
if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
|
|
174
|
+
# Ensure resid is 1D
|
|
175
|
+
resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
|
|
176
|
+
|
|
177
|
+
resid_df = pd.DataFrame({
|
|
178
|
+
'entity': self.results.entity_index,
|
|
179
|
+
'time': self.results.time_index,
|
|
180
|
+
'resid': resid_flat
|
|
181
|
+
})
|
|
182
|
+
else:
|
|
183
|
+
raise AttributeError(
|
|
184
|
+
"Results object must have 'entity_index' and 'time_index' attributes. "
|
|
185
|
+
"Please ensure your model stores these during estimation."
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return resid_df
|