panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,13 @@
1
+ """
2
+ Heteroskedasticity tests for panel models.
3
+ """
4
+
5
+ from panelbox.validation.heteroskedasticity.modified_wald import ModifiedWaldTest
6
+ from panelbox.validation.heteroskedasticity.breusch_pagan import BreuschPaganTest
7
+ from panelbox.validation.heteroskedasticity.white import WhiteTest
8
+
9
+ __all__ = [
10
+ 'ModifiedWaldTest',
11
+ 'BreuschPaganTest',
12
+ 'WhiteTest',
13
+ ]
@@ -0,0 +1,222 @@
1
+ """
2
+ Breusch-Pagan LM test for heteroskedasticity in panel data.
3
+
4
+ References
5
+ ----------
6
+ Breusch, T. S., & Pagan, A. R. (1979). A simple test for heteroscedasticity
7
+ and random coefficient variation. Econometrica, 47(5), 1287-1294.
8
+
9
+ Greene, W. H. (2018). Econometric Analysis (8th ed.). Pearson.
10
+ """
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from scipy import stats
15
+
16
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
17
+
18
+
19
+ class BreuschPaganTest(ValidationTest):
20
+ """
21
+ Breusch-Pagan LM test for heteroskedasticity.
22
+
23
+ Tests the null hypothesis that the error variance is constant
24
+ (homoskedasticity) against the alternative that the variance
25
+ is a function of the regressors.
26
+
27
+ H0: sigma²_i = sigma² (homoskedasticity)
28
+ H1: sigma²_i = h(X_i) (heteroskedasticity)
29
+
30
+ The test regresses squared residuals on the original regressors
31
+ and tests if the coefficients are jointly zero using an LM statistic.
32
+
33
+ Notes
34
+ -----
35
+ The test statistic is n*R² from the auxiliary regression, which
36
+ follows a chi-squared distribution with k degrees of freedom
37
+ under the null hypothesis, where k is the number of regressors
38
+ (excluding the constant).
39
+
40
+ Examples
41
+ --------
42
+ >>> from panelbox.models.static.pooled_ols import PooledOLS
43
+ >>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
44
+ >>> results = model.fit()
45
+ >>>
46
+ >>> from panelbox.validation.heteroskedasticity.breusch_pagan import BreuschPaganTest
47
+ >>> test = BreuschPaganTest(results)
48
+ >>> result = test.run()
49
+ >>> print(result)
50
+ """
51
+
52
+ def __init__(self, results: 'PanelResults'):
53
+ """
54
+ Initialize Breusch-Pagan test.
55
+
56
+ Parameters
57
+ ----------
58
+ results : PanelResults
59
+ Results from panel model estimation
60
+ """
61
+ super().__init__(results)
62
+
63
+ # Store original design matrix if available
64
+ # We'll need the original X matrix for the auxiliary regression
65
+ self._X = None
66
+ if hasattr(results, '_model'):
67
+ if hasattr(results._model, '_X_orig'):
68
+ self._X = results._model._X_orig
69
+
70
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
71
+ """
72
+ Run Breusch-Pagan LM test for heteroskedasticity.
73
+
74
+ Parameters
75
+ ----------
76
+ alpha : float, default=0.05
77
+ Significance level
78
+
79
+ Returns
80
+ -------
81
+ ValidationTestResult
82
+ Test results
83
+
84
+ Raises
85
+ ------
86
+ ValueError
87
+ If design matrix is not available
88
+
89
+ Notes
90
+ -----
91
+ The test procedure:
92
+ 1. Estimate the original model and obtain residuals e
93
+ 2. Compute squared residuals e²
94
+ 3. Regress e² on the original regressors X
95
+ 4. Compute LM statistic = n*R² from this auxiliary regression
96
+ 5. Compare to chi-squared(k) distribution
97
+ """
98
+ # Get squared residuals
99
+ resid_sq = self.resid ** 2
100
+
101
+ # We need the design matrix X
102
+ # Try to get it from the model or reconstruct it
103
+ X = self._get_design_matrix()
104
+
105
+ if X is None:
106
+ raise ValueError(
107
+ "Design matrix not available. "
108
+ "Breusch-Pagan test requires access to the original regressors."
109
+ )
110
+
111
+ n = len(resid_sq)
112
+
113
+ # Auxiliary regression: resid² on X
114
+ # OLS: beta_aux = (X'X)^{-1} X'resid²
115
+ try:
116
+ XtX = X.T @ X
117
+ Xty = X.T @ resid_sq
118
+ beta_aux = np.linalg.solve(XtX, Xty)
119
+ except np.linalg.LinAlgError:
120
+ # Singular matrix, use pseudo-inverse
121
+ beta_aux = np.linalg.lstsq(X, resid_sq, rcond=None)[0]
122
+
123
+ # Fitted values from auxiliary regression
124
+ fitted_aux = X @ beta_aux
125
+
126
+ # R² from auxiliary regression using explained sum of squares
127
+ # This is more numerically stable than 1 - SSR/SST
128
+ mean_resid_sq = np.mean(resid_sq)
129
+ SST = np.sum((resid_sq - mean_resid_sq) ** 2)
130
+ SSE = np.sum((fitted_aux - mean_resid_sq) ** 2)
131
+
132
+ # R² = SSE/SST (explained variation / total variation)
133
+ if SST > 0:
134
+ R2_aux = SSE / SST
135
+ else:
136
+ R2_aux = 0.0
137
+
138
+ # Ensure R² is in [0, 1]
139
+ # Due to numerical errors, R² might be slightly negative or > 1
140
+ R2_aux = np.clip(R2_aux, 0.0, 1.0)
141
+
142
+ # LM statistic = n * R²
143
+ # This must be non-negative
144
+ lm_stat = n * R2_aux
145
+
146
+ # Sanity check: LM statistic must be >= 0
147
+ if lm_stat < 0:
148
+ # This should never happen, but if it does, set to 0
149
+ lm_stat = 0.0
150
+
151
+ # Degrees of freedom = number of regressors (excluding constant)
152
+ k = X.shape[1]
153
+
154
+ # Check if X has a constant column (all 1s or all same value)
155
+ # Check first and last columns as constant might be anywhere
156
+ has_constant = False
157
+ for col_idx in range(k):
158
+ col = X[:, col_idx]
159
+ if np.allclose(col, col[0]):
160
+ has_constant = True
161
+ break
162
+
163
+ if has_constant:
164
+ df = k - 1
165
+ else:
166
+ df = k
167
+
168
+ if df <= 0:
169
+ df = 1 # At least 1 df
170
+
171
+ # P-value from chi-squared distribution
172
+ pvalue = 1 - stats.chi2.cdf(lm_stat, df)
173
+
174
+ # Metadata
175
+ metadata = {
176
+ 'R2_auxiliary': R2_aux,
177
+ 'n_obs': n,
178
+ 'n_regressors': k
179
+ }
180
+
181
+ result = ValidationTestResult(
182
+ test_name="Breusch-Pagan LM Test for Heteroskedasticity",
183
+ statistic=lm_stat,
184
+ pvalue=pvalue,
185
+ null_hypothesis="Homoskedasticity (constant error variance)",
186
+ alternative_hypothesis="Heteroskedasticity (variance depends on regressors)",
187
+ alpha=alpha,
188
+ df=df,
189
+ metadata=metadata
190
+ )
191
+
192
+ return result
193
+
194
+ def _get_design_matrix(self) -> np.ndarray:
195
+ """
196
+ Get the design matrix X.
197
+
198
+ Returns
199
+ -------
200
+ np.ndarray or None
201
+ Design matrix, or None if not available
202
+ """
203
+ # Try to get from stored _X
204
+ if self._X is not None:
205
+ return self._X
206
+
207
+ # Try to get from model through results
208
+ if hasattr(self.results, '_model'):
209
+ model = self.results._model
210
+
211
+ # Try to rebuild design matrices
212
+ if hasattr(model, 'formula_parser') and hasattr(model, 'data'):
213
+ try:
214
+ _, X = model.formula_parser.build_design_matrices(
215
+ model.data.data,
216
+ return_type='array'
217
+ )
218
+ return X
219
+ except Exception:
220
+ pass
221
+
222
+ return None
@@ -0,0 +1,172 @@
1
+ """
2
+ Modified Wald test for groupwise heteroskedasticity in fixed effects models.
3
+
4
+ References
5
+ ----------
6
+ Greene, W. H. (2000). Econometric Analysis (4th ed.). Prentice Hall.
7
+
8
+ Baum, C. F. (2001). Residual diagnostics for cross-section time series
9
+ regression models. The Stata Journal, 1(1), 101-104.
10
+
11
+ Stata command: xttest3
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from scipy import stats
17
+
18
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
19
+
20
+
21
+ class ModifiedWaldTest(ValidationTest):
22
+ """
23
+ Modified Wald test for groupwise heteroskedasticity.
24
+
25
+ Tests the null hypothesis that the error variance is the same across
26
+ all cross-sectional units (entities) against the alternative that
27
+ variances differ across groups.
28
+
29
+ H0: sigma²_1 = sigma²_2 = ... = sigma²_N
30
+ H1: sigma²_i ≠ sigma²_j for some i ≠ j
31
+
32
+ This test is specifically designed for fixed effects panel models and
33
+ is robust to serial correlation.
34
+
35
+ Notes
36
+ -----
37
+ The test statistic follows a chi-squared distribution with N degrees
38
+ of freedom under the null hypothesis, where N is the number of entities.
39
+
40
+ This test requires that each entity has the same number of time periods
41
+ (balanced panel) or can be adapted for unbalanced panels.
42
+
43
+ Examples
44
+ --------
45
+ >>> from panelbox.models.static.fixed_effects import FixedEffects
46
+ >>> fe = FixedEffects("y ~ x1 + x2", data, "entity", "time")
47
+ >>> results = fe.fit()
48
+ >>>
49
+ >>> from panelbox.validation.heteroskedasticity.modified_wald import ModifiedWaldTest
50
+ >>> test = ModifiedWaldTest(results)
51
+ >>> result = test.run()
52
+ >>> print(result)
53
+ """
54
+
55
+ def __init__(self, results: 'PanelResults'):
56
+ """
57
+ Initialize Modified Wald test.
58
+
59
+ Parameters
60
+ ----------
61
+ results : PanelResults
62
+ Results from panel model estimation (preferably Fixed Effects)
63
+ """
64
+ super().__init__(results)
65
+
66
+ # Check if model is suitable
67
+ if 'Fixed Effects' not in self.model_type:
68
+ import warnings
69
+ warnings.warn(
70
+ "Modified Wald test is designed for Fixed Effects models. "
71
+ f"Current model: {self.model_type}"
72
+ )
73
+
74
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
75
+ """
76
+ Run Modified Wald test for groupwise heteroskedasticity.
77
+
78
+ Parameters
79
+ ----------
80
+ alpha : float, default=0.05
81
+ Significance level
82
+
83
+ Returns
84
+ -------
85
+ ValidationTestResult
86
+ Test results
87
+
88
+ Raises
89
+ ------
90
+ ValueError
91
+ If required data indices are not available
92
+ """
93
+ # Get residuals with entity information
94
+ resid_df = self._prepare_residual_data()
95
+
96
+ # Compute variance for each entity
97
+ entity_vars = resid_df.groupby('entity')['resid'].var()
98
+ entity_counts = resid_df.groupby('entity').size()
99
+
100
+ n_entities = len(entity_vars)
101
+
102
+ # Modified Wald statistic
103
+ # sum over i of: (T_i - 1) * ln(sigma²_pooled) - ln(sigma²_i)
104
+ # where sigma²_pooled is the pooled variance
105
+
106
+ # Pooled variance (weighted by sample size)
107
+ total_resid_sq = np.sum((resid_df['resid'] ** 2).values)
108
+ total_obs = len(resid_df)
109
+ k = len(self.params) # number of parameters
110
+ pooled_var = total_resid_sq / (total_obs - n_entities - k)
111
+
112
+ # Wald statistic
113
+ wald_stat = 0.0
114
+ for entity in entity_vars.index:
115
+ T_i = entity_counts[entity]
116
+ sigma2_i = entity_vars[entity]
117
+
118
+ if sigma2_i <= 0:
119
+ continue
120
+
121
+ wald_stat += (T_i * np.log(pooled_var / sigma2_i))
122
+
123
+ # Under H0, the statistic is approximately chi2(N)
124
+ df = n_entities
125
+ pvalue = 1 - stats.chi2.cdf(wald_stat, df)
126
+
127
+ # Metadata
128
+ metadata = {
129
+ 'n_entities': n_entities,
130
+ 'pooled_variance': pooled_var,
131
+ 'min_entity_var': entity_vars.min(),
132
+ 'max_entity_var': entity_vars.max(),
133
+ 'variance_ratio': entity_vars.max() / entity_vars.min() if entity_vars.min() > 0 else np.inf
134
+ }
135
+
136
+ result = ValidationTestResult(
137
+ test_name="Modified Wald Test for Groupwise Heteroskedasticity",
138
+ statistic=wald_stat,
139
+ pvalue=pvalue,
140
+ null_hypothesis="Homoskedasticity (constant variance across entities)",
141
+ alternative_hypothesis="Groupwise heteroskedasticity present",
142
+ alpha=alpha,
143
+ df=df,
144
+ metadata=metadata
145
+ )
146
+
147
+ return result
148
+
149
+ def _prepare_residual_data(self) -> pd.DataFrame:
150
+ """
151
+ Prepare residual data with entity identifiers.
152
+
153
+ Returns
154
+ -------
155
+ pd.DataFrame
156
+ DataFrame with columns: entity, resid
157
+ """
158
+ if hasattr(self.results, 'entity_index'):
159
+ # Ensure resid is 1D
160
+ resid_flat = self.resid.ravel() if hasattr(self.resid, 'ravel') else self.resid
161
+
162
+ resid_df = pd.DataFrame({
163
+ 'entity': self.results.entity_index,
164
+ 'resid': resid_flat
165
+ })
166
+ else:
167
+ raise AttributeError(
168
+ "Results object must have 'entity_index' attribute. "
169
+ "Please ensure your model stores this during estimation."
170
+ )
171
+
172
+ return resid_df
@@ -0,0 +1,208 @@
1
+ """
2
+ White test for heteroskedasticity in panel data.
3
+
4
+ References
5
+ ----------
6
+ White, H. (1980). A heteroskedasticity-consistent covariance matrix estimator
7
+ and a direct test for heteroskedasticity. Econometrica, 48(4), 817-838.
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from scipy import stats
13
+ from itertools import combinations_with_replacement
14
+
15
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
16
+
17
+
18
+ class WhiteTest(ValidationTest):
19
+ """
20
+ White test for heteroskedasticity.
21
+
22
+ Tests the null hypothesis of homoskedasticity against a general
23
+ alternative of heteroskedasticity. Unlike Breusch-Pagan, this test
24
+ does not assume a specific functional form for the heteroskedasticity.
25
+
26
+ H0: sigma²_i = sigma² (homoskedasticity)
27
+ H1: sigma²_i depends on regressors in a general way
28
+
29
+ The test regresses squared residuals on the original regressors,
30
+ their squares, and cross-products (interactions).
31
+
32
+ Notes
33
+ -----
34
+ The test statistic is n*R² from the auxiliary regression, which
35
+ follows a chi-squared distribution under the null hypothesis.
36
+
37
+ For models with many regressors, the test can have low power due
38
+ to the large number of terms in the auxiliary regression.
39
+
40
+ Examples
41
+ --------
42
+ >>> from panelbox.models.static.pooled_ols import PooledOLS
43
+ >>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
44
+ >>> results = model.fit()
45
+ >>>
46
+ >>> from panelbox.validation.heteroskedasticity.white import WhiteTest
47
+ >>> test = WhiteTest(results)
48
+ >>> result = test.run()
49
+ >>> print(result)
50
+ """
51
+
52
+ def __init__(self, results: 'PanelResults'):
53
+ """
54
+ Initialize White test.
55
+
56
+ Parameters
57
+ ----------
58
+ results : PanelResults
59
+ Results from panel model estimation
60
+ """
61
+ super().__init__(results)
62
+ self._X = None
63
+ if hasattr(results, '_model'):
64
+ if hasattr(results._model, '_X_orig'):
65
+ self._X = results._model._X_orig
66
+
67
+ def run(self, alpha: float = 0.05, cross_terms: bool = True) -> ValidationTestResult:
68
+ """
69
+ Run White test for heteroskedasticity.
70
+
71
+ Parameters
72
+ ----------
73
+ alpha : float, default=0.05
74
+ Significance level
75
+ cross_terms : bool, default=True
76
+ If True, include cross-product terms in auxiliary regression
77
+ If False, only include squared terms (less computationally intensive)
78
+
79
+ Returns
80
+ -------
81
+ ValidationTestResult
82
+ Test results
83
+
84
+ Raises
85
+ ------
86
+ ValueError
87
+ If design matrix is not available
88
+ """
89
+ # Get squared residuals
90
+ resid_sq = self.resid ** 2
91
+
92
+ # Get design matrix
93
+ X = self._get_design_matrix()
94
+
95
+ if X is None:
96
+ raise ValueError(
97
+ "Design matrix not available. "
98
+ "White test requires access to the original regressors."
99
+ )
100
+
101
+ n = len(resid_sq)
102
+ k_orig = X.shape[1]
103
+
104
+ # Build augmented design matrix with squares and cross-products
105
+ # Remove constant column if present (first column all 1s)
106
+ if np.allclose(X[:, 0], 1.0):
107
+ X_vars = X[:, 1:] # Exclude constant
108
+ has_constant = True
109
+ else:
110
+ X_vars = X
111
+ has_constant = False
112
+
113
+ k_vars = X_vars.shape[1]
114
+
115
+ # Create list of columns for auxiliary regression
116
+ aux_cols = [np.ones(n)] if has_constant or not has_constant else [np.ones(n)]
117
+
118
+ # Add original variables
119
+ for j in range(k_vars):
120
+ aux_cols.append(X_vars[:, j])
121
+
122
+ # Add squared terms
123
+ for j in range(k_vars):
124
+ aux_cols.append(X_vars[:, j] ** 2)
125
+
126
+ # Add cross-product terms if requested
127
+ if cross_terms and k_vars > 1:
128
+ for i, j in combinations_with_replacement(range(k_vars), 2):
129
+ if i < j: # Only upper triangle (avoid duplicates)
130
+ aux_cols.append(X_vars[:, i] * X_vars[:, j])
131
+
132
+ # Stack into matrix
133
+ X_aux = np.column_stack(aux_cols)
134
+
135
+ # Auxiliary regression: resid² on X_aux
136
+ try:
137
+ XtX = X_aux.T @ X_aux
138
+ Xty = X_aux.T @ resid_sq
139
+ beta_aux = np.linalg.solve(XtX, Xty)
140
+ except np.linalg.LinAlgError:
141
+ beta_aux = np.linalg.lstsq(X_aux, resid_sq, rcond=None)[0]
142
+
143
+ # Fitted values
144
+ fitted_aux = X_aux @ beta_aux
145
+
146
+ # R²
147
+ mean_resid_sq = np.mean(resid_sq)
148
+ SST = np.sum((resid_sq - mean_resid_sq) ** 2)
149
+ SSR = np.sum((resid_sq - fitted_aux) ** 2)
150
+
151
+ if SST > 0:
152
+ R2_aux = 1 - SSR / SST
153
+ else:
154
+ R2_aux = 0.0
155
+
156
+ # LM statistic
157
+ lm_stat = n * R2_aux
158
+
159
+ # Degrees of freedom = number of auxiliary regressors - 1 (for constant)
160
+ df = X_aux.shape[1] - 1
161
+
162
+ if df <= 0:
163
+ df = 1
164
+
165
+ # P-value
166
+ pvalue = 1 - stats.chi2.cdf(lm_stat, df)
167
+
168
+ # Metadata
169
+ metadata = {
170
+ 'R2_auxiliary': R2_aux,
171
+ 'n_obs': n,
172
+ 'n_original_regressors': k_orig,
173
+ 'n_auxiliary_terms': X_aux.shape[1],
174
+ 'includes_cross_terms': cross_terms
175
+ }
176
+
177
+ result = ValidationTestResult(
178
+ test_name="White Test for Heteroskedasticity",
179
+ statistic=lm_stat,
180
+ pvalue=pvalue,
181
+ null_hypothesis="Homoskedasticity (constant error variance)",
182
+ alternative_hypothesis="General heteroskedasticity",
183
+ alpha=alpha,
184
+ df=df,
185
+ metadata=metadata
186
+ )
187
+
188
+ return result
189
+
190
+ def _get_design_matrix(self) -> np.ndarray:
191
+ """Get the design matrix X."""
192
+ if self._X is not None:
193
+ return self._X
194
+
195
+ if hasattr(self.results, '_model'):
196
+ model = self.results._model
197
+
198
+ if hasattr(model, 'formula_parser') and hasattr(model, 'data'):
199
+ try:
200
+ _, X = model.formula_parser.build_design_matrices(
201
+ model.data.data,
202
+ return_type='array'
203
+ )
204
+ return X
205
+ except Exception:
206
+ pass
207
+
208
+ return None
File without changes
File without changes
@@ -0,0 +1,13 @@
1
+ """
2
+ Serial correlation tests for panel models.
3
+ """
4
+
5
+ from panelbox.validation.serial_correlation.wooldridge_ar import WooldridgeARTest
6
+ from panelbox.validation.serial_correlation.breusch_godfrey import BreuschGodfreyTest
7
+ from panelbox.validation.serial_correlation.baltagi_wu import BaltagiWuTest
8
+
9
+ __all__ = [
10
+ 'WooldridgeARTest',
11
+ 'BreuschGodfreyTest',
12
+ 'BaltagiWuTest',
13
+ ]