panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,220 @@
1
+ """
2
+ Baltagi-Wu LBI test for serial correlation in panel data.
3
+
4
+ LBI = Locally Best Invariant
5
+
6
+ References
7
+ ----------
8
+ Baltagi, B. H., & Wu, P. X. (1999). Unequally Spaced Panel Data Regressions
9
+ with AR(1) Disturbances. Econometric Theory, 15(6), 814-823.
10
+
11
+ Baltagi, B. H., & Li, Q. (1995). Testing AR(1) Against MA(1) Disturbances
12
+ in an Error Component Model. Journal of Econometrics, 68(1), 133-151.
13
+ """
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from scipy import stats
18
+
19
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
20
+
21
+
22
+ class BaltagiWuTest(ValidationTest):
23
+ """
24
+ Baltagi-Wu LBI test for first-order serial correlation in panel data.
25
+
26
+ This test is designed for unbalanced panels and tests for AR(1)
27
+ serial correlation in the idiosyncratic errors.
28
+
29
+ H0: No first-order serial correlation (rho = 0)
30
+ H1: AR(1) serial correlation present (rho ≠ 0)
31
+
32
+ The test is based on a modified Durbin-Watson statistic that
33
+ accounts for unbalanced panel structure.
34
+
35
+ Notes
36
+ -----
37
+ The test statistic is:
38
+
39
+ LBI = sum_i sum_t (e_it - e_{i,t-1})² / sum_i sum_t e_it²
40
+
41
+ Under H0, LBI ≈ 2 (similar to Durbin-Watson).
42
+ LBI < 2 suggests positive autocorrelation.
43
+ LBI > 2 suggests negative autocorrelation.
44
+
45
+ Unlike the standard Durbin-Watson test, the Baltagi-Wu test:
46
+ - Works with unbalanced panels
47
+ - Accounts for heterogeneous time series lengths
48
+ - Provides asymptotic normal distribution under H0
49
+
50
+ Examples
51
+ --------
52
+ >>> from panelbox.models.static.fixed_effects import FixedEffects
53
+ >>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
54
+ >>> results = fe.fit()
55
+ >>>
56
+ >>> from panelbox.validation.serial_correlation.baltagi_wu import BaltagiWuTest
57
+ >>> test = BaltagiWuTest(results)
58
+ >>> result = test.run()
59
+ >>> print(result)
60
+ """
61
+
62
+ def __init__(self, results: 'PanelResults'):
63
+ """
64
+ Initialize Baltagi-Wu test.
65
+
66
+ Parameters
67
+ ----------
68
+ results : PanelResults
69
+ Results from panel model estimation
70
+ """
71
+ super().__init__(results)
72
+
73
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
74
+ """
75
+ Run Baltagi-Wu test for serial correlation.
76
+
77
+ Parameters
78
+ ----------
79
+ alpha : float, default=0.05
80
+ Significance level
81
+
82
+ Returns
83
+ -------
84
+ ValidationTestResult
85
+ Test results
86
+
87
+ Raises
88
+ ------
89
+ ValueError
90
+ If panel has fewer than 2 time periods per entity
91
+
92
+ Notes
93
+ -----
94
+ The test uses asymptotic normality of the LBI statistic.
95
+ For large N and T, (LBI - 2) is approximately normally distributed.
96
+
97
+ The transformation to a standard normal test statistic uses:
98
+ z = (LBI - 2) / sqrt(var(LBI))
99
+
100
+ where var(LBI) is estimated from the panel structure.
101
+ """
102
+ # Get residuals with entity and time structure
103
+ resid_df = self._prepare_residual_data()
104
+
105
+ # Sort by entity and time
106
+ resid_df = resid_df.sort_values(['entity', 'time'])
107
+
108
+ # Compute lagged residuals within each entity
109
+ resid_df['resid_lag'] = resid_df.groupby('entity')['resid'].shift(1)
110
+
111
+ # Compute differences
112
+ resid_df['resid_diff'] = resid_df['resid'] - resid_df['resid_lag']
113
+
114
+ # Drop missing values (first observation of each entity)
115
+ resid_df_clean = resid_df.dropna(subset=['resid_diff', 'resid_lag'])
116
+
117
+ if len(resid_df_clean) == 0:
118
+ raise ValueError(
119
+ "No valid observations after computing differences. "
120
+ "Ensure each entity has at least 2 time periods."
121
+ )
122
+
123
+ # Compute LBI statistic
124
+ # LBI = sum(diff²) / sum(resid²)
125
+ numerator = np.sum(resid_df_clean['resid_diff'] ** 2)
126
+ denominator = np.sum(resid_df['resid'] ** 2) # Use all residuals
127
+
128
+ if denominator == 0:
129
+ raise ValueError("Sum of squared residuals is zero (perfect fit)")
130
+
131
+ lbi_stat = numerator / denominator
132
+
133
+ # Compute approximate variance of LBI
134
+ # Under H0: E[LBI] ≈ 2
135
+ # Var(LBI) ≈ 4 / (N*T_bar) where T_bar is average time periods
136
+ n_entities = resid_df['entity'].nunique()
137
+ n_obs_total = len(resid_df)
138
+ t_bar = n_obs_total / n_entities
139
+
140
+ # More refined variance estimate
141
+ # Account for unbalanced structure
142
+ entity_counts = resid_df.groupby('entity').size()
143
+ t_i = entity_counts.values
144
+
145
+ # Variance formula for unbalanced panels
146
+ # Var(LBI) ≈ 4 * sum(1/T_i) / N
147
+ var_lbi = 4 * np.sum(1 / t_i) / n_entities
148
+
149
+ # Standard error
150
+ se_lbi = np.sqrt(var_lbi)
151
+
152
+ # Test statistic: (LBI - 2) / SE(LBI)
153
+ # Under H0, this is approximately N(0,1)
154
+ if se_lbi == 0:
155
+ raise ValueError("Standard error is zero")
156
+
157
+ z_stat = (lbi_stat - 2) / se_lbi
158
+
159
+ # Two-sided p-value
160
+ pvalue = 2 * (1 - stats.norm.cdf(abs(z_stat)))
161
+
162
+ # Metadata
163
+ # Estimate rho from LBI: rho ≈ 1 - LBI/2
164
+ rho_estimate = 1 - lbi_stat / 2
165
+
166
+ metadata = {
167
+ 'lbi_statistic': float(lbi_stat),
168
+ 'z_statistic': float(z_stat),
169
+ 'rho_estimate': float(rho_estimate),
170
+ 'n_entities': int(n_entities),
171
+ 'n_obs_total': int(n_obs_total),
172
+ 'n_obs_used': len(resid_df_clean),
173
+ 'avg_time_periods': float(t_bar),
174
+ 'min_time_periods': int(t_i.min()),
175
+ 'max_time_periods': int(t_i.max()),
176
+ 'variance_lbi': float(var_lbi),
177
+ 'se_lbi': float(se_lbi),
178
+ 'interpretation': (
179
+ 'LBI < 2: positive autocorrelation, '
180
+ 'LBI ≈ 2: no autocorrelation, '
181
+ 'LBI > 2: negative autocorrelation'
182
+ )
183
+ }
184
+
185
+ result = ValidationTestResult(
186
+ test_name="Baltagi-Wu LBI Test for Serial Correlation",
187
+ statistic=z_stat,
188
+ pvalue=pvalue,
189
+ null_hypothesis="No first-order serial correlation",
190
+ alternative_hypothesis="First-order serial correlation present",
191
+ alpha=alpha,
192
+ df=None, # Asymptotic test, no df
193
+ metadata=metadata
194
+ )
195
+
196
+ return result
197
+
198
+ def _prepare_residual_data(self) -> pd.DataFrame:
199
+ """
200
+ Prepare residual data with entity and time identifiers.
201
+
202
+ Returns
203
+ -------
204
+ pd.DataFrame
205
+ DataFrame with columns: entity, time, resid
206
+ """
207
+ if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
208
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
209
+
210
+ resid_df = pd.DataFrame({
211
+ 'entity': self.results.entity_index,
212
+ 'time': self.results.time_index,
213
+ 'resid': resid_flat
214
+ })
215
+
216
+ return resid_df
217
+ else:
218
+ raise AttributeError(
219
+ "Results object must have 'entity_index' and 'time_index' attributes"
220
+ )
@@ -0,0 +1,260 @@
1
+ """
2
+ Breusch-Godfrey LM test for serial correlation in panel data.
3
+
4
+ References
5
+ ----------
6
+ Breusch, T. S. (1978). Testing for autocorrelation in dynamic linear models.
7
+ Australian Economic Papers, 17(31), 334-355.
8
+
9
+ Godfrey, L. G. (1978). Testing against general autoregressive and moving average
10
+ error models when the regressors include lagged dependent variables.
11
+ Econometrica, 46(6), 1293-1301.
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from scipy import stats
17
+
18
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
19
+
20
+
21
+ class BreuschGodfreyTest(ValidationTest):
22
+ """
23
+ Breusch-Godfrey LM test for serial correlation.
24
+
25
+ Tests the null hypothesis of no serial correlation against the
26
+ alternative of AR(p) serial correlation in the errors.
27
+
28
+ H0: No serial correlation
29
+ H1: AR(p) serial correlation present
30
+
31
+ The test regresses the residuals on lagged residuals and the original
32
+ regressors, then tests if the lagged residuals are jointly significant.
33
+
34
+ Notes
35
+ -----
36
+ Unlike the Durbin-Watson test, the BG test:
37
+ - Can test for higher-order serial correlation
38
+ - Is valid when regressors include lagged dependent variables
39
+ - Provides an LM test statistic ~ Chi2(p)
40
+
41
+ For panel data, the test is applied accounting for the panel structure.
42
+
43
+ Examples
44
+ --------
45
+ >>> from panelbox.models.static.fixed_effects import FixedEffects
46
+ >>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
47
+ >>> results = fe.fit()
48
+ >>>
49
+ >>> from panelbox.validation.serial_correlation.breusch_godfrey import BreuschGodfreyTest
50
+ >>> test = BreuschGodfreyTest(results)
51
+ >>> result = test.run(lags=1) # Test for AR(1)
52
+ >>> print(result)
53
+ """
54
+
55
+ def __init__(self, results: 'PanelResults'):
56
+ """
57
+ Initialize Breusch-Godfrey test.
58
+
59
+ Parameters
60
+ ----------
61
+ results : PanelResults
62
+ Results from panel model estimation
63
+ """
64
+ super().__init__(results)
65
+
66
+ def run(self, lags: int = 1, alpha: float = 0.05) -> ValidationTestResult:
67
+ """
68
+ Run Breusch-Godfrey LM test for serial correlation.
69
+
70
+ Parameters
71
+ ----------
72
+ lags : int, default=1
73
+ Number of lags to test (order of AR process)
74
+ alpha : float, default=0.05
75
+ Significance level
76
+
77
+ Returns
78
+ -------
79
+ ValidationTestResult
80
+ Test results
81
+
82
+ Raises
83
+ ------
84
+ ValueError
85
+ If required data is not available or lags < 1
86
+
87
+ Notes
88
+ -----
89
+ The test procedure:
90
+ 1. Obtain residuals from original model
91
+ 2. Regress residuals on lagged residuals (up to lag p) and original X
92
+ 3. Compute LM = n*R² from this auxiliary regression
93
+ 4. Compare to Chi2(p) distribution
94
+ """
95
+ if lags < 1:
96
+ raise ValueError(f"lags must be >= 1, got {lags}")
97
+
98
+ # Get residuals with entity and time structure
99
+ resid_df = self._prepare_residual_data()
100
+
101
+ # Get design matrix
102
+ X = self._get_design_matrix()
103
+
104
+ if X is None:
105
+ raise ValueError(
106
+ "Design matrix not available for Breusch-Godfrey test"
107
+ )
108
+
109
+ # Create lagged residuals
110
+ resid_df = resid_df.sort_values(['entity', 'time'])
111
+
112
+ for lag in range(1, lags + 1):
113
+ resid_df[f'resid_lag{lag}'] = resid_df.groupby('entity')['resid'].shift(lag)
114
+
115
+ # Drop missing values (first lags obs per entity)
116
+ lag_cols = [f'resid_lag{i}' for i in range(1, lags + 1)]
117
+ resid_df = resid_df.dropna(subset=lag_cols)
118
+
119
+ if len(resid_df) == 0:
120
+ raise ValueError("No valid observations after creating lags")
121
+
122
+ # Get residuals and lagged residuals as arrays
123
+ resid = resid_df['resid'].values
124
+ X_lags = resid_df[lag_cols].values
125
+
126
+ # Match X to the reduced sample (after dropping NAs)
127
+ # We need to align X with the residuals we kept
128
+ # This is tricky - we need the original indices
129
+
130
+ # Simpler approach: use all X but only for non-missing resid indices
131
+ if len(X) == len(resid) + lags * resid_df['entity'].nunique():
132
+ # Need to match indices properly
133
+ # For now, assume X already matches the full data
134
+ # and we need to select the rows that correspond to non-missing resid
135
+
136
+ # Get indices of non-missing residuals
137
+ valid_indices = resid_df.index.values
138
+
139
+ # This assumes resid_df index corresponds to original data indices
140
+ if max(valid_indices) < len(X):
141
+ X_matched = X[valid_indices, :]
142
+ else:
143
+ # Fallback: use last len(resid) rows
144
+ X_matched = X[-len(resid):, :]
145
+ else:
146
+ # Assume X and resid are aligned
147
+ X_matched = X[:len(resid), :]
148
+
149
+ # Auxiliary regression: resid on [X, resid_lag1, ..., resid_lagp]
150
+ X_aug = np.column_stack([X_matched, X_lags])
151
+
152
+ # OLS
153
+ try:
154
+ XtX = X_aug.T @ X_aug
155
+ Xty = X_aug.T @ resid
156
+ beta_aux = np.linalg.solve(XtX, Xty)
157
+ except np.linalg.LinAlgError:
158
+ beta_aux = np.linalg.lstsq(X_aug, resid, rcond=None)[0]
159
+
160
+ # Fitted values
161
+ fitted_aux = X_aug @ beta_aux
162
+
163
+ # R² from auxiliary regression using explained sum of squares
164
+ # This is more numerically stable
165
+ mean_resid = np.mean(resid)
166
+ SST = np.sum((resid - mean_resid) ** 2)
167
+ SSE = np.sum((fitted_aux - mean_resid) ** 2)
168
+
169
+ if SST > 0:
170
+ R2_aux = SSE / SST
171
+ else:
172
+ R2_aux = 0.0
173
+
174
+ # Ensure R² is in [0, 1]
175
+ R2_aux = np.clip(R2_aux, 0.0, 1.0)
176
+
177
+ # LM statistic for panel data
178
+ # IMPORTANT: For panel data, we use the number of CROSS-SECTIONAL UNITS (N)
179
+ # not the total number of observations (N*T) or the reduced sample size.
180
+ #
181
+ # The Breusch-Godfrey test for panels (pbgtest in plm) uses:
182
+ # LM = N * R²
183
+ # where N is the number of cross-sectional units.
184
+ #
185
+ # This is different from the time-series version which uses n = T observations.
186
+ #
187
+ # Reference: Baltagi & Li (1995), "Testing AR(1) against MA(1) disturbances
188
+ # in an error component model"
189
+
190
+ n_entities = resid_df['entity'].nunique()
191
+ lm_stat = n_entities * R2_aux
192
+
193
+ # Sanity check
194
+ if lm_stat < 0:
195
+ lm_stat = 0.0
196
+
197
+ # Degrees of freedom = number of lags
198
+ df = lags
199
+
200
+ # P-value
201
+ pvalue = 1 - stats.chi2.cdf(lm_stat, df)
202
+
203
+ # Metadata
204
+ n_obs = len(resid)
205
+ metadata = {
206
+ 'lags': lags,
207
+ 'R2_auxiliary': R2_aux,
208
+ 'n_obs_auxiliary': n_obs,
209
+ 'n_entities': n_entities,
210
+ 'note': 'Panel BG test uses LM = N * R² where N = number of entities'
211
+ }
212
+
213
+ result = ValidationTestResult(
214
+ test_name=f"Breusch-Godfrey LM Test for Serial Correlation (AR({lags}))",
215
+ statistic=lm_stat,
216
+ pvalue=pvalue,
217
+ null_hypothesis="No serial correlation",
218
+ alternative_hypothesis=f"AR({lags}) serial correlation present",
219
+ alpha=alpha,
220
+ df=df,
221
+ metadata=metadata
222
+ )
223
+
224
+ return result
225
+
226
+ def _prepare_residual_data(self) -> pd.DataFrame:
227
+ """Prepare residual data with entity and time identifiers."""
228
+ if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
229
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
230
+
231
+ resid_df = pd.DataFrame({
232
+ 'entity': self.results.entity_index,
233
+ 'time': self.results.time_index,
234
+ 'resid': resid_flat
235
+ })
236
+
237
+ return resid_df
238
+ else:
239
+ raise AttributeError(
240
+ "Results object must have 'entity_index' and 'time_index' attributes"
241
+ )
242
+
243
+ def _get_design_matrix(self) -> np.ndarray:
244
+ """Get the design matrix X."""
245
+ if not hasattr(self.results, '_model'):
246
+ return None
247
+
248
+ model = self.results._model
249
+
250
+ if hasattr(model, 'formula_parser') and hasattr(model, 'data'):
251
+ try:
252
+ _, X = model.formula_parser.build_design_matrices(
253
+ model.data.data,
254
+ return_type='array'
255
+ )
256
+ return X
257
+ except Exception:
258
+ pass
259
+
260
+ return None
@@ -0,0 +1,200 @@
1
+ """
2
+ Wooldridge test for autocorrelation in panel data.
3
+
4
+ References
5
+ ----------
6
+ Wooldridge, J. M. (2002). Econometric Analysis of Cross Section and Panel Data.
7
+ MIT Press, Section 10.4.1.
8
+
9
+ Stata command: xtserial
10
+ """
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from scipy import stats
15
+
16
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
17
+
18
+
19
+ class WooldridgeARTest(ValidationTest):
20
+ """
21
+ Wooldridge test for first-order autocorrelation in panel data.
22
+
23
+ This test is specifically designed for fixed effects models and tests
24
+ for AR(1) autocorrelation in the idiosyncratic errors.
25
+
26
+ The test is based on regressing the first-differenced residuals on their
27
+ own lag and testing if the coefficient equals -0.5 (which is the value
28
+ under H0 of no serial correlation).
29
+
30
+ Notes
31
+ -----
32
+ The test statistic is approximately distributed as F(1, N-1) under the null
33
+ of no first-order serial correlation.
34
+
35
+ This test requires at least T >= 3 time periods.
36
+
37
+ Examples
38
+ --------
39
+ >>> from panelbox.models.static.fixed_effects import FixedEffects
40
+ >>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
41
+ >>> results = fe.fit()
42
+ >>>
43
+ >>> from panelbox.validation.serial_correlation.wooldridge_ar import WooldridgeARTest
44
+ >>> test = WooldridgeARTest(results)
45
+ >>> result = test.run()
46
+ >>> print(result)
47
+ """
48
+
49
+ def __init__(self, results: 'PanelResults'):
50
+ """
51
+ Initialize Wooldridge AR test.
52
+
53
+ Parameters
54
+ ----------
55
+ results : PanelResults
56
+ Results from panel model estimation (preferably Fixed Effects)
57
+ """
58
+ super().__init__(results)
59
+
60
+ # Check if model is suitable
61
+ if 'Fixed Effects' not in self.model_type:
62
+ import warnings
63
+ warnings.warn(
64
+ "Wooldridge test is designed for Fixed Effects models. "
65
+ f"Current model: {self.model_type}"
66
+ )
67
+
68
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
69
+ """
70
+ Run Wooldridge test for AR(1) autocorrelation.
71
+
72
+ Parameters
73
+ ----------
74
+ alpha : float, default=0.05
75
+ Significance level
76
+
77
+ Returns
78
+ -------
79
+ ValidationTestResult
80
+ Test results
81
+
82
+ Raises
83
+ ------
84
+ ValueError
85
+ If panel has fewer than 3 time periods
86
+ """
87
+ # Get residuals as DataFrame with entity and time info
88
+ # We need to reconstruct the panel structure
89
+ resid_df = self._prepare_residual_data()
90
+
91
+ # Check minimum time periods
92
+ min_T = resid_df.groupby('entity').size().min()
93
+ if min_T < 3:
94
+ raise ValueError(
95
+ f"Wooldridge test requires at least 3 time periods. "
96
+ f"Minimum found: {min_T}"
97
+ )
98
+
99
+ # Compute first differences of residuals
100
+ resid_df = resid_df.sort_values(['entity', 'time'])
101
+ resid_df['resid_diff'] = resid_df.groupby('entity')['resid'].diff()
102
+ resid_df['resid_diff_lag'] = resid_df.groupby('entity')['resid_diff'].shift(1)
103
+
104
+ # Drop missing values (first two obs per entity are lost)
105
+ resid_df = resid_df.dropna(subset=['resid_diff', 'resid_diff_lag'])
106
+
107
+ if len(resid_df) == 0:
108
+ raise ValueError("No valid observations after differencing")
109
+
110
+ # Regression: Δe_it on Δe_{i,t-1}
111
+ y = resid_df['resid_diff'].values
112
+ X = resid_df['resid_diff_lag'].values
113
+
114
+ # OLS regression
115
+ n = len(y)
116
+ beta = np.sum(X * y) / np.sum(X * X)
117
+
118
+ # Residuals
119
+ fitted = beta * X
120
+ resid_reg = y - fitted
121
+
122
+ # Standard error of beta
123
+ s2 = np.sum(resid_reg ** 2) / (n - 1)
124
+ se_beta = np.sqrt(s2 / np.sum(X * X))
125
+
126
+ # Test H0: beta = -0.5 (no serial correlation)
127
+ # Under H0, if no autocorrelation, E[Δe_it * Δe_{i,t-1}] = -sigma²/2
128
+ # So coefficient should be -0.5
129
+ t_stat = (beta - (-0.5)) / se_beta
130
+
131
+ # F statistic (F = t²)
132
+ f_stat = t_stat ** 2
133
+
134
+ # P-value from F distribution
135
+ # Number of entities
136
+ n_entities = resid_df['entity'].nunique()
137
+ df_num = 1
138
+ df_denom = n_entities - 1
139
+
140
+ pvalue = 1 - stats.f.cdf(f_stat, df_num, df_denom)
141
+
142
+ # Metadata
143
+ metadata = {
144
+ 'coefficient': beta,
145
+ 'std_error': se_beta,
146
+ 't_statistic': t_stat,
147
+ 'n_entities': n_entities,
148
+ 'n_obs_used': n
149
+ }
150
+
151
+ result = ValidationTestResult(
152
+ test_name="Wooldridge Test for Autocorrelation",
153
+ statistic=f_stat,
154
+ pvalue=pvalue,
155
+ null_hypothesis="No first-order autocorrelation",
156
+ alternative_hypothesis="First-order autocorrelation present",
157
+ alpha=alpha,
158
+ df=(df_num, df_denom),
159
+ metadata=metadata
160
+ )
161
+
162
+ return result
163
+
164
+ def _prepare_residual_data(self) -> pd.DataFrame:
165
+ """
166
+ Prepare residual data with entity and time identifiers.
167
+
168
+ Returns
169
+ -------
170
+ pd.DataFrame
171
+ DataFrame with columns: entity, time, resid
172
+ """
173
+ # Try to get entity and time from model metadata
174
+ # This assumes the model stored the original data structure
175
+
176
+ # For now, we'll try to extract from the results object
177
+ # This requires that the model kept track of entity/time indices
178
+
179
+ # If results has entity_index and time_index attributes
180
+ if hasattr(self.results, 'entity_index') and hasattr(self.results, 'time_index'):
181
+ # Ensure resid is 1D
182
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
183
+
184
+ resid_df = pd.DataFrame({
185
+ 'entity': self.results.entity_index,
186
+ 'time': self.results.time_index,
187
+ 'resid': resid_flat
188
+ })
189
+ else:
190
+ # Fallback: try to reconstruct from model's data attribute
191
+ # This assumes the results object has reference to the original model
192
+ # which has the PanelData object
193
+
194
+ # For now, raise informative error
195
+ raise AttributeError(
196
+ "Results object must have 'entity_index' and 'time_index' attributes. "
197
+ "Please ensure your model stores these during estimation."
198
+ )
199
+
200
+ return resid_df
@@ -0,0 +1,16 @@
1
+ """
2
+ Specification tests for panel models.
3
+ """
4
+
5
+ from panelbox.validation.specification.hausman import HausmanTest, HausmanTestResult
6
+ from panelbox.validation.specification.mundlak import MundlakTest
7
+ from panelbox.validation.specification.reset import RESETTest
8
+ from panelbox.validation.specification.chow import ChowTest
9
+
10
+ __all__ = [
11
+ 'HausmanTest',
12
+ 'HausmanTestResult',
13
+ 'MundlakTest',
14
+ 'RESETTest',
15
+ 'ChowTest',
16
+ ]