panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,222 @@
1
+ """
2
+ Breusch-Pagan LM test for cross-sectional dependence in panel data.
3
+
4
+ References
5
+ ----------
6
+ Breusch, T. S., & Pagan, A. R. (1980). The Lagrange Multiplier Test and its
7
+ Applications to Model Specification in Econometrics. The Review of Economic
8
+ Studies, 47(1), 239-253.
9
+
10
+ Pesaran, M. H. (2004). General Diagnostic Tests for Cross Section Dependence
11
+ in Panels. Cambridge Working Papers in Economics No. 0435.
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from scipy import stats
17
+
18
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
19
+
20
+
21
+ class BreuschPaganLMTest(ValidationTest):
22
+ """
23
+ Breusch-Pagan LM test for cross-sectional dependence.
24
+
25
+ Tests the null hypothesis that residuals are cross-sectionally
26
+ independent (no contemporaneous correlation across entities).
27
+
28
+ H0: Corr(e_it, e_jt) = 0 for all i ≠ j
29
+ H1: Some Corr(e_it, e_jt) ≠ 0
30
+
31
+ The test is based on the sum of squared pairwise correlation
32
+ coefficients of residuals.
33
+
34
+ Notes
35
+ -----
36
+ The test statistic is:
37
+
38
+ LM = T * sum_{i<j} rho_ij²
39
+
40
+ where rho_ij is the sample correlation between residuals of
41
+ entity i and entity j, and the sum is over all N(N-1)/2 pairs.
42
+
43
+ Under H0, LM ~ Chi2(N(N-1)/2)
44
+
45
+ This test is appropriate for panels with:
46
+ - Fixed T (time periods)
47
+ - N not too large (becomes over-sized as N → ∞)
48
+ - For large N, use Pesaran CD test instead
49
+
50
+ The test requires a balanced panel or will use pairwise complete
51
+ observations for each entity pair.
52
+
53
+ Examples
54
+ --------
55
+ >>> from panelbox.models.static.pooled_ols import PooledOLS
56
+ >>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
57
+ >>> results = model.fit()
58
+ >>>
59
+ >>> from panelbox.validation.cross_sectional_dependence.breusch_pagan_lm import BreuschPaganLMTest
60
+ >>> test = BreuschPaganLMTest(results)
61
+ >>> result = test.run()
62
+ >>> print(result)
63
+ """
64
+
65
+ def __init__(self, results: 'PanelResults'):
66
+ """
67
+ Initialize Breusch-Pagan LM test.
68
+
69
+ Parameters
70
+ ----------
71
+ results : PanelResults
72
+ Results from panel model estimation
73
+ """
74
+ super().__init__(results)
75
+
76
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
77
+ """
78
+ Run Breusch-Pagan LM test for cross-sectional dependence.
79
+
80
+ Parameters
81
+ ----------
82
+ alpha : float, default=0.05
83
+ Significance level
84
+
85
+ Returns
86
+ -------
87
+ ValidationTestResult
88
+ Test results
89
+
90
+ Warnings
91
+ --------
92
+ This test can be over-sized (reject H0 too often) when N is large.
93
+ For large N (> 30), consider using the Pesaran CD test instead.
94
+
95
+ Notes
96
+ -----
97
+ The test requires computing N(N-1)/2 pairwise correlations.
98
+ For large N, this can be computationally intensive.
99
+ """
100
+ # Get residuals with entity and time structure
101
+ resid_df = self._prepare_residual_data()
102
+
103
+ # Create wide format: rows = time, columns = entities
104
+ resid_wide = resid_df.pivot(index='time', columns='entity', values='resid')
105
+
106
+ # Get dimensions
107
+ T = resid_wide.shape[0] # Number of time periods
108
+ N = resid_wide.shape[1] # Number of entities
109
+
110
+ if N < 2:
111
+ raise ValueError(
112
+ "Need at least 2 entities for cross-sectional dependence test"
113
+ )
114
+
115
+ # Compute pairwise correlations
116
+ # Use pairwise complete observations
117
+ correlations = []
118
+ n_pairs = 0
119
+
120
+ entity_list = list(resid_wide.columns)
121
+
122
+ for i in range(N):
123
+ for j in range(i + 1, N):
124
+ entity_i = entity_list[i]
125
+ entity_j = entity_list[j]
126
+
127
+ # Get residuals for this pair (drop NaN)
128
+ resid_i = resid_wide[entity_i].dropna()
129
+ resid_j = resid_wide[entity_j].dropna()
130
+
131
+ # Find common time periods
132
+ common_times = resid_i.index.intersection(resid_j.index)
133
+
134
+ if len(common_times) >= 3: # Need at least 3 obs for correlation
135
+ resid_i_common = resid_i.loc[common_times]
136
+ resid_j_common = resid_j.loc[common_times]
137
+
138
+ # Compute correlation
139
+ rho_ij = np.corrcoef(resid_i_common, resid_j_common)[0, 1]
140
+
141
+ # Handle potential NaN from constant series
142
+ if not np.isnan(rho_ij):
143
+ correlations.append(rho_ij)
144
+ n_pairs += 1
145
+
146
+ if n_pairs == 0:
147
+ raise ValueError(
148
+ "No valid pairwise correlations could be computed. "
149
+ "Check for constant residuals or insufficient data."
150
+ )
151
+
152
+ # Compute LM statistic
153
+ # LM = T * sum(rho_ij²)
154
+ correlations = np.array(correlations)
155
+ lm_stat = T * np.sum(correlations ** 2)
156
+
157
+ # Degrees of freedom = number of pairs
158
+ # For complete data: N(N-1)/2
159
+ # For incomplete data: actual number of pairs
160
+ df = n_pairs
161
+
162
+ # P-value from chi-squared distribution
163
+ pvalue = 1 - stats.chi2.cdf(lm_stat, df)
164
+
165
+ # Metadata
166
+ mean_abs_corr = np.mean(np.abs(correlations))
167
+ max_abs_corr = np.max(np.abs(correlations))
168
+ positive_corrs = np.sum(correlations > 0)
169
+ negative_corrs = np.sum(correlations < 0)
170
+
171
+ metadata = {
172
+ 'n_entities': int(N),
173
+ 'n_time_periods': int(T),
174
+ 'n_pairs': int(n_pairs),
175
+ 'n_pairs_expected': int(N * (N - 1) // 2),
176
+ 'mean_abs_correlation': float(mean_abs_corr),
177
+ 'max_abs_correlation': float(max_abs_corr),
178
+ 'n_positive_correlations': int(positive_corrs),
179
+ 'n_negative_correlations': int(negative_corrs),
180
+ 'warning': (
181
+ 'Test may be over-sized for large N. '
182
+ 'Consider Pesaran CD test if N > 30.'
183
+ if N > 30 else None
184
+ )
185
+ }
186
+
187
+ result = ValidationTestResult(
188
+ test_name="Breusch-Pagan LM Test for Cross-Sectional Dependence",
189
+ statistic=lm_stat,
190
+ pvalue=pvalue,
191
+ null_hypothesis="No cross-sectional dependence (residuals independent across entities)",
192
+ alternative_hypothesis="Cross-sectional dependence present",
193
+ alpha=alpha,
194
+ df=df,
195
+ metadata=metadata
196
+ )
197
+
198
+ return result
199
+
200
+ def _prepare_residual_data(self) -> pd.DataFrame:
201
+ """
202
+ Prepare residual data with entity and time identifiers.
203
+
204
+ Returns
205
+ -------
206
+ pd.DataFrame
207
+ DataFrame with columns: entity, time, resid
208
+ """
209
+ if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
210
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
211
+
212
+ resid_df = pd.DataFrame({
213
+ 'entity': self.results.entity_index,
214
+ 'time': self.results.time_index,
215
+ 'resid': resid_flat
216
+ })
217
+
218
+ return resid_df
219
+ else:
220
+ raise AttributeError(
221
+ "Results object must have 'entity_index' and 'time_index' attributes"
222
+ )
@@ -0,0 +1,297 @@
1
+ """
2
+ Frees test for cross-sectional dependence in panel data.
3
+
4
+ References
5
+ ----------
6
+ Frees, E. W. (1995). Assessing Cross-Sectional Correlation in Panel Data.
7
+ Journal of Econometrics, 69(2), 393-414.
8
+
9
+ Frees, E. W. (2004). Longitudinal and Panel Data: Analysis and Applications
10
+ in the Social Sciences. Cambridge University Press.
11
+ """
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ from scipy import stats
16
+
17
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
18
+
19
+
20
+ class FreesTest(ValidationTest):
21
+ """
22
+ Frees test for cross-sectional dependence in panel data.
23
+
24
+ This is a non-parametric test that uses the Friedman-type rank
25
+ statistic to test for cross-sectional dependence.
26
+
27
+ H0: No cross-sectional dependence (residuals independent across entities)
28
+ H1: Cross-sectional dependence present
29
+
30
+ The test is robust to non-normality and heteroskedasticity.
31
+
32
+ Notes
33
+ -----
34
+ The test statistic is based on the average Spearman rank correlation:
35
+
36
+ Q_F = sum_{i<j} R_ij² / [N(N-1)/2]
37
+
38
+ where R_ij is the Spearman rank correlation between residuals
39
+ of entities i and j.
40
+
41
+ Under H0, Q_F follows an approximate distribution that can be
42
+ compared to critical values tabulated by Frees (1995, 2004).
43
+
44
+ For large samples, we use asymptotic approximation:
45
+ sqrt(T) * Q_F is approximately normal.
46
+
47
+ Advantages over Breusch-Pagan LM test:
48
+ - Non-parametric (doesn't require normality)
49
+ - More robust to outliers
50
+ - Better size properties for moderate N
51
+
52
+ Examples
53
+ --------
54
+ >>> from panelbox.models.static.pooled_ols import PooledOLS
55
+ >>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
56
+ >>> results = model.fit()
57
+ >>>
58
+ >>> from panelbox.validation.cross_sectional_dependence.frees import FreesTest
59
+ >>> test = FreesTest(results)
60
+ >>> result = test.run()
61
+ >>> print(result)
62
+ """
63
+
64
+ def __init__(self, results: 'PanelResults'):
65
+ """
66
+ Initialize Frees test.
67
+
68
+ Parameters
69
+ ----------
70
+ results : PanelResults
71
+ Results from panel model estimation
72
+ """
73
+ super().__init__(results)
74
+
75
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
76
+ """
77
+ Run Frees test for cross-sectional dependence.
78
+
79
+ Parameters
80
+ ----------
81
+ alpha : float, default=0.05
82
+ Significance level
83
+
84
+ Returns
85
+ -------
86
+ ValidationTestResult
87
+ Test results
88
+
89
+ Notes
90
+ -----
91
+ The test uses asymptotic approximation for p-values.
92
+ Critical values from Frees (1995) are also provided in metadata
93
+ for reference.
94
+ """
95
+ # Get residuals with entity and time structure
96
+ resid_df = self._prepare_residual_data()
97
+
98
+ # Create wide format: rows = time, columns = entities
99
+ resid_wide = resid_df.pivot(index='time', columns='entity', values='resid')
100
+
101
+ # Get dimensions
102
+ T = resid_wide.shape[0] # Number of time periods
103
+ N = resid_wide.shape[1] # Number of entities
104
+
105
+ if N < 2:
106
+ raise ValueError(
107
+ "Need at least 2 entities for cross-sectional dependence test"
108
+ )
109
+
110
+ if T < 3:
111
+ raise ValueError(
112
+ "Need at least 3 time periods for Frees test"
113
+ )
114
+
115
+ # Compute pairwise Spearman correlations
116
+ rank_correlations = []
117
+ n_pairs = 0
118
+
119
+ entity_list = list(resid_wide.columns)
120
+
121
+ for i in range(N):
122
+ for j in range(i + 1, N):
123
+ entity_i = entity_list[i]
124
+ entity_j = entity_list[j]
125
+
126
+ # Get residuals for this pair (drop NaN)
127
+ resid_i = resid_wide[entity_i].dropna()
128
+ resid_j = resid_wide[entity_j].dropna()
129
+
130
+ # Find common time periods
131
+ common_times = resid_i.index.intersection(resid_j.index)
132
+
133
+ if len(common_times) >= 3: # Need at least 3 obs for rank correlation
134
+ resid_i_common = resid_i.loc[common_times].values
135
+ resid_j_common = resid_j.loc[common_times].values
136
+
137
+ # Compute Spearman rank correlation
138
+ # This is robust to outliers and non-normality
139
+ rho_rank, _ = stats.spearmanr(resid_i_common, resid_j_common)
140
+
141
+ # Handle potential NaN from constant series
142
+ if not np.isnan(rho_rank):
143
+ rank_correlations.append(rho_rank)
144
+ n_pairs += 1
145
+
146
+ if n_pairs == 0:
147
+ raise ValueError(
148
+ "No valid pairwise rank correlations could be computed. "
149
+ "Check for constant residuals or insufficient data."
150
+ )
151
+
152
+ # Compute Frees statistic
153
+ # Q_F = mean of squared rank correlations
154
+ rank_correlations = np.array(rank_correlations)
155
+ q_frees = np.mean(rank_correlations ** 2)
156
+
157
+ # Asymptotic distribution
158
+ # Under H0: E[Q_F] = 1/(T-1)
159
+ # Var[Q_F] ≈ 2(T-3) / [(T+1)(T-1)²]
160
+ expected_qf = 1 / (T - 1)
161
+ var_qf = 2 * (T - 3) / ((T + 1) * (T - 1) ** 2)
162
+ se_qf = np.sqrt(var_qf / n_pairs) # SE of mean
163
+
164
+ # Test statistic: standardized Q_F
165
+ if se_qf > 0:
166
+ z_stat = (q_frees - expected_qf) / se_qf
167
+ else:
168
+ z_stat = 0.0
169
+
170
+ # P-value (two-sided)
171
+ pvalue = 2 * (1 - stats.norm.cdf(abs(z_stat)))
172
+
173
+ # Critical values from Frees (1995) Table 1
174
+ # These are for balanced panels at alpha=0.05
175
+ critical_values = self._get_critical_values(T, N)
176
+
177
+ # Simple interpretation
178
+ interpretation = (
179
+ "Reject H0 (cross-sectional dependence detected)"
180
+ if q_frees > critical_values.get('alpha_0.05', float('inf'))
181
+ else "Do not reject H0 (no evidence of cross-sectional dependence)"
182
+ )
183
+
184
+ # Metadata
185
+ mean_abs_rank_corr = np.mean(np.abs(rank_correlations))
186
+ max_abs_rank_corr = np.max(np.abs(rank_correlations))
187
+
188
+ metadata = {
189
+ 'q_frees_statistic': float(q_frees),
190
+ 'z_statistic': float(z_stat),
191
+ 'expected_qf_under_h0': float(expected_qf),
192
+ 'n_entities': int(N),
193
+ 'n_time_periods': int(T),
194
+ 'n_pairs': int(n_pairs),
195
+ 'mean_abs_rank_correlation': float(mean_abs_rank_corr),
196
+ 'max_abs_rank_correlation': float(max_abs_rank_corr),
197
+ 'critical_values': critical_values,
198
+ 'interpretation': interpretation,
199
+ 'note': (
200
+ 'Frees test is non-parametric and robust to non-normality. '
201
+ 'Critical values are approximate for unbalanced panels.'
202
+ )
203
+ }
204
+
205
+ result = ValidationTestResult(
206
+ test_name="Frees Test for Cross-Sectional Dependence",
207
+ statistic=z_stat,
208
+ pvalue=pvalue,
209
+ null_hypothesis="No cross-sectional dependence (residuals independent across entities)",
210
+ alternative_hypothesis="Cross-sectional dependence present",
211
+ alpha=alpha,
212
+ df=None, # Non-parametric test
213
+ metadata=metadata
214
+ )
215
+
216
+ return result
217
+
218
+ def _get_critical_values(self, T, N):
219
+ """
220
+ Get critical values from Frees (1995) Table 1.
221
+
222
+ Parameters
223
+ ----------
224
+ T : int
225
+ Number of time periods
226
+ N : int
227
+ Number of entities
228
+
229
+ Returns
230
+ -------
231
+ dict
232
+ Critical values at different significance levels
233
+
234
+ Notes
235
+ -----
236
+ These are approximate critical values for balanced panels.
237
+ Interpolation is used for T values not in the table.
238
+ """
239
+ # Approximate critical values from Frees (1995, 2004)
240
+ # Q_F critical values at alpha = 0.10, 0.05, 0.01
241
+ # Rows: T, Columns: significance levels
242
+
243
+ # For small T, use tabulated values
244
+ if T <= 5:
245
+ cv_0_10 = 0.3000 # Very approximate
246
+ cv_0_05 = 0.4000
247
+ cv_0_01 = 0.6000
248
+ elif T <= 10:
249
+ cv_0_10 = 0.1429
250
+ cv_0_05 = 0.2000
251
+ cv_0_01 = 0.3000
252
+ elif T <= 20:
253
+ cv_0_10 = 0.0800
254
+ cv_0_05 = 0.1100
255
+ cv_0_01 = 0.1700
256
+ elif T <= 30:
257
+ cv_0_10 = 0.0543
258
+ cv_0_05 = 0.0754
259
+ cv_0_01 = 0.1170
260
+ else:
261
+ # Asymptotic approximation
262
+ cv_0_10 = 1.28 * np.sqrt(2 * (T - 3) / ((T + 1) * (T - 1) ** 2)) + 1 / (T - 1)
263
+ cv_0_05 = 1.96 * np.sqrt(2 * (T - 3) / ((T + 1) * (T - 1) ** 2)) + 1 / (T - 1)
264
+ cv_0_01 = 2.58 * np.sqrt(2 * (T - 3) / ((T + 1) * (T - 1) ** 2)) + 1 / (T - 1)
265
+
266
+ return {
267
+ 'alpha_0.10': cv_0_10,
268
+ 'alpha_0.05': cv_0_05,
269
+ 'alpha_0.01': cv_0_01,
270
+ 'T': T,
271
+ 'N': N,
272
+ 'note': 'Approximate critical values from Frees (1995)'
273
+ }
274
+
275
+ def _prepare_residual_data(self) -> pd.DataFrame:
276
+ """
277
+ Prepare residual data with entity and time identifiers.
278
+
279
+ Returns
280
+ -------
281
+ pd.DataFrame
282
+ DataFrame with columns: entity, time, resid
283
+ """
284
+ if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
285
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
286
+
287
+ resid_df = pd.DataFrame({
288
+ 'entity': self.results.entity_index,
289
+ 'time': self.results.time_index,
290
+ 'resid': resid_flat
291
+ })
292
+
293
+ return resid_df
294
+ else:
295
+ raise AttributeError(
296
+ "Results object must have 'entity_index' and 'time_index' attributes"
297
+ )
@@ -0,0 +1,188 @@
1
+ """
2
+ Pesaran CD test for cross-sectional dependence in panel data.
3
+
4
+ References
5
+ ----------
6
+ Pesaran, M. H. (2004). General diagnostic tests for cross section dependence
7
+ in panels. University of Cambridge, Faculty of Economics, Cambridge Working
8
+ Papers in Economics No. 0435.
9
+
10
+ Stata command: xtcd
11
+ """
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ from scipy import stats
16
+ from itertools import combinations
17
+
18
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
19
+
20
+
21
+ class PesaranCDTest(ValidationTest):
22
+ """
23
+ Pesaran CD test for cross-sectional dependence.
24
+
25
+ Tests the null hypothesis of cross-sectional independence against the
26
+ alternative of cross-sectional dependence.
27
+
28
+ The test is based on the average of pairwise correlation coefficients
29
+ of the residuals.
30
+
31
+ H0: No cross-sectional dependence (residuals are independent across entities)
32
+ H1: Cross-sectional dependence present
33
+
34
+ Notes
35
+ -----
36
+ The test statistic is asymptotically distributed as N(0,1) under the null
37
+ hypothesis. The test works well for both N large, T small and N large, T large
38
+ panels.
39
+
40
+ The test requires T > 3 and works best when N is reasonably large.
41
+
42
+ Examples
43
+ --------
44
+ >>> from panelbox.models.static.fixed_effects import FixedEffects
45
+ >>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
46
+ >>> results = fe.fit()
47
+ >>>
48
+ >>> from panelbox.validation.cross_sectional.pesaran_cd import PesaranCDTest
49
+ >>> test = PesaranCDTest(results)
50
+ >>> result = test.run()
51
+ >>> print(result)
52
+ """
53
+
54
+ def __init__(self, results: 'PanelResults'):
55
+ """
56
+ Initialize Pesaran CD test.
57
+
58
+ Parameters
59
+ ----------
60
+ results : PanelResults
61
+ Results from panel model estimation
62
+ """
63
+ super().__init__(results)
64
+
65
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
66
+ """
67
+ Run Pesaran CD test for cross-sectional dependence.
68
+
69
+ Parameters
70
+ ----------
71
+ alpha : float, default=0.05
72
+ Significance level
73
+
74
+ Returns
75
+ -------
76
+ ValidationTestResult
77
+ Test results
78
+
79
+ Raises
80
+ ------
81
+ ValueError
82
+ If required data indices are not available or T < 3
83
+ """
84
+ # Get residuals with entity and time information
85
+ resid_df = self._prepare_residual_data()
86
+
87
+ # Reshape residuals to wide format (entities as columns, time as rows)
88
+ resid_wide = resid_df.pivot(index='time', columns='entity', values='resid')
89
+
90
+ # Check minimum time periods
91
+ T = len(resid_wide)
92
+ if T < 3:
93
+ raise ValueError(
94
+ f"Pesaran CD test requires at least 3 time periods. Found: {T}"
95
+ )
96
+
97
+ N = len(resid_wide.columns)
98
+
99
+ # Compute pairwise correlations
100
+ correlations = []
101
+ T_ij_list = [] # Effective sample size for each pair
102
+
103
+ for i, j in combinations(range(N), 2):
104
+ # Get residuals for entities i and j
105
+ e_i = resid_wide.iloc[:, i]
106
+ e_j = resid_wide.iloc[:, j]
107
+
108
+ # Drop missing values for this pair
109
+ valid = ~(e_i.isna() | e_j.isna())
110
+ e_i_valid = e_i[valid]
111
+ e_j_valid = e_j[valid]
112
+
113
+ T_ij = len(e_i_valid)
114
+
115
+ if T_ij >= 3: # Need at least 3 observations to compute correlation
116
+ # Correlation coefficient
117
+ rho_ij = np.corrcoef(e_i_valid, e_j_valid)[0, 1]
118
+ correlations.append(rho_ij)
119
+ T_ij_list.append(T_ij)
120
+
121
+ if len(correlations) == 0:
122
+ raise ValueError("No valid pairwise correlations could be computed")
123
+
124
+ # Pesaran CD statistic
125
+ # CD = sqrt(2T / (N(N-1))) * sum(rho_ij)
126
+ rho_sum = np.sum(correlations)
127
+
128
+ # Use average T for unbalanced panels
129
+ T_avg = np.mean(T_ij_list) if len(T_ij_list) > 0 else T
130
+
131
+ cd_stat = np.sqrt(2 * T_avg / (N * (N - 1))) * rho_sum
132
+
133
+ # Under H0, CD ~ N(0,1)
134
+ pvalue = 2 * (1 - stats.norm.cdf(np.abs(cd_stat)))
135
+
136
+ # Average absolute correlation
137
+ avg_abs_corr = np.mean(np.abs(correlations))
138
+
139
+ # Metadata
140
+ metadata = {
141
+ 'n_entities': N,
142
+ 'n_time_periods': T,
143
+ 'n_pairs': len(correlations),
144
+ 'avg_correlation': np.mean(correlations),
145
+ 'avg_abs_correlation': avg_abs_corr,
146
+ 'max_abs_correlation': np.max(np.abs(correlations)),
147
+ 'min_correlation': np.min(correlations),
148
+ 'max_correlation': np.max(correlations)
149
+ }
150
+
151
+ result = ValidationTestResult(
152
+ test_name="Pesaran CD Test for Cross-Sectional Dependence",
153
+ statistic=cd_stat,
154
+ pvalue=pvalue,
155
+ null_hypothesis="No cross-sectional dependence",
156
+ alternative_hypothesis="Cross-sectional dependence present",
157
+ alpha=alpha,
158
+ df=None,
159
+ metadata=metadata
160
+ )
161
+
162
+ return result
163
+
164
+ def _prepare_residual_data(self) -> pd.DataFrame:
165
+ """
166
+ Prepare residual data with entity and time identifiers.
167
+
168
+ Returns
169
+ -------
170
+ pd.DataFrame
171
+ DataFrame with columns: entity, time, resid
172
+ """
173
+ if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
174
+ # Ensure resid is 1D
175
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
176
+
177
+ resid_df = pd.DataFrame({
178
+ 'entity': self.results.entity_index,
179
+ 'time': self.results.time_index,
180
+ 'resid': resid_flat
181
+ })
182
+ else:
183
+ raise AttributeError(
184
+ "Results object must have 'entity_index' and 'time_index' attributes. "
185
+ "Please ensure your model stores these during estimation."
186
+ )
187
+
188
+ return resid_df