panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,331 @@
1
+ """
2
+ Mundlak test for Random Effects specification.
3
+
4
+ References
5
+ ----------
6
+ Mundlak, Y. (1978). On the pooling of time series and cross section data.
7
+ Econometrica, 46(1), 69-85.
8
+
9
+ Wooldridge, J. M. (2010). Econometric Analysis of Cross Section and Panel Data
10
+ (2nd ed.). MIT Press.
11
+ """
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ from scipy import stats
16
+
17
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
18
+
19
+
20
+ class MundlakTest(ValidationTest):
21
+ """
22
+ Mundlak test for Random Effects specification.
23
+
24
+ Tests whether the random effects assumption that entity effects are
25
+ uncorrelated with the regressors is valid.
26
+
27
+ H0: Cov(u_i, X_it) = 0 (RE is appropriate)
28
+ H1: Cov(u_i, X_it) ≠ 0 (use FE instead)
29
+
30
+ The test augments the RE model with the time averages of the
31
+ time-varying regressors and tests if their coefficients are jointly zero.
32
+
33
+ Notes
34
+ -----
35
+ This is essentially testing the same thing as the Hausman test, but
36
+ implemented differently. If the Mundlak test rejects, it suggests
37
+ that Fixed Effects should be used instead of Random Effects.
38
+
39
+ The test statistic is an F-test (or Wald chi-squared test) on the
40
+ coefficients of the time-averaged variables.
41
+
42
+ Examples
43
+ --------
44
+ >>> from panelbox.models.static.random_effects import RandomEffects
45
+ >>> re = RandomEffects("y ~ x1 + x2", data, "entity", "time")
46
+ >>> results = re.fit()
47
+ >>>
48
+ >>> from panelbox.validation.specification.mundlak import MundlakTest
49
+ >>> test = MundlakTest(results)
50
+ >>> result = test.run()
51
+ >>> print(result)
52
+ """
53
+
54
+ def __init__(self, results: 'PanelResults'):
55
+ """
56
+ Initialize Mundlak test.
57
+
58
+ Parameters
59
+ ----------
60
+ results : PanelResults
61
+ Results from panel model estimation (preferably Random Effects)
62
+ """
63
+ super().__init__(results)
64
+
65
+ if 'Random Effects' not in self.model_type:
66
+ import warnings
67
+ warnings.warn(
68
+ "Mundlak test is designed for Random Effects models. "
69
+ f"Current model: {self.model_type}"
70
+ )
71
+
72
+ def run(self, alpha: float = 0.05) -> ValidationTestResult:
73
+ """
74
+ Run Mundlak test for RE specification.
75
+
76
+ Parameters
77
+ ----------
78
+ alpha : float, default=0.05
79
+ Significance level
80
+
81
+ Returns
82
+ -------
83
+ ValidationTestResult
84
+ Test results
85
+
86
+ Raises
87
+ ------
88
+ ValueError
89
+ If design matrix or entity indices are not available
90
+
91
+ Notes
92
+ -----
93
+ The test procedure:
94
+ 1. Estimate augmented RE model: y_it = X_it*beta + X_i_bar*delta + u_i + e_it
95
+ where X_i_bar are entity means of time-varying variables
96
+ 2. Test H0: delta = 0 using Wald test
97
+ 3. If reject, RE assumption is violated → use FE
98
+
99
+ Implementation:
100
+ This implementation follows the standard approach used in R (plm package)
101
+ and Stata. The augmented model is estimated using Random Effects with
102
+ Swamy-Arora transformation to properly account for the panel structure.
103
+ """
104
+ # Get original data, formula, and variable names
105
+ data, formula, entity_col, time_col, var_names = self._get_data_full()
106
+
107
+ if data is None or formula is None or var_names is None:
108
+ raise ValueError(
109
+ "Data, formula, and variable names required for Mundlak test. "
110
+ "Ensure the model was estimated with a formula and panel structure."
111
+ )
112
+
113
+ # Create augmented dataset with group means
114
+ data_aug = data.copy()
115
+
116
+ # Compute entity means for each regressor (excluding constant)
117
+ mean_vars = []
118
+ for var in var_names:
119
+ if var in data_aug.columns:
120
+ mean_col_name = f'{var}_mean'
121
+ data_aug[mean_col_name] = data_aug.groupby(entity_col)[var].transform('mean')
122
+ mean_vars.append(mean_col_name)
123
+
124
+ if len(mean_vars) == 0:
125
+ raise ValueError(
126
+ "No time-varying regressors found. "
127
+ "Mundlak test requires at least one time-varying regressor."
128
+ )
129
+
130
+ # Build augmented formula: y ~ x1 + x2 + ... + x1_mean + x2_mean + ...
131
+ # Parse original formula to get dependent variable
132
+ dep_var = formula.split('~')[0].strip()
133
+ orig_vars = ' + '.join(var_names)
134
+ mean_formula = ' + '.join(mean_vars)
135
+ augmented_formula = f"{dep_var} ~ {orig_vars} + {mean_formula}"
136
+
137
+ # Estimate augmented model with cluster-robust SE
138
+ # NOTE: We use Pooled OLS with clustered SE instead of RE because
139
+ # the PanelBox RE implementation has numerical issues with variables
140
+ # that are constant within-group (like group means).
141
+ # Pooled OLS with cluster-robust SE gives results very similar to
142
+ # R's plm RE estimation for the Mundlak test.
143
+ try:
144
+ from panelbox.models.static.pooled_ols import PooledOLS
145
+
146
+ model_augmented = PooledOLS(
147
+ augmented_formula,
148
+ data_aug,
149
+ entity_col,
150
+ time_col
151
+ )
152
+ # Use cluster-robust SE (clustered by entity)
153
+ re_results = model_augmented.fit(cov_type='clustered', cov_kwds={'groups': entity_col})
154
+
155
+ except Exception as e:
156
+ raise ValueError(
157
+ f"Failed to estimate augmented model: {e}"
158
+ )
159
+
160
+ # Extract coefficients on group means (delta)
161
+ k_vars = len(mean_vars)
162
+
163
+ # Get parameter names and find indices of mean variables
164
+ param_names = list(re_results.params.index)
165
+ mean_indices = [i for i, name in enumerate(param_names) if name in mean_vars]
166
+
167
+ if len(mean_indices) != k_vars:
168
+ raise ValueError(
169
+ f"Expected {k_vars} mean coefficients, found {len(mean_indices)}"
170
+ )
171
+
172
+ # Extract delta coefficients
173
+ delta = re_results.params.iloc[mean_indices].values
174
+
175
+ # Extract variance-covariance matrix for delta
176
+ # This is the key: we use the var-cov from the RE model, not OLS
177
+ vcov_full = re_results.cov_params
178
+ vcov_delta = vcov_full.iloc[mean_indices, mean_indices].values
179
+
180
+ # Wald test: delta' Var(delta)^{-1} delta ~ Chi2(k_vars)
181
+ try:
182
+ vcov_delta_inv = np.linalg.inv(vcov_delta)
183
+ except np.linalg.LinAlgError:
184
+ vcov_delta_inv = np.linalg.pinv(vcov_delta)
185
+
186
+ # Compute quadratic form
187
+ wald_stat_array = delta.T @ vcov_delta_inv @ delta
188
+ wald_stat = float(
189
+ wald_stat_array.item() if hasattr(wald_stat_array, 'item')
190
+ else wald_stat_array
191
+ )
192
+
193
+ # Degrees of freedom
194
+ df = k_vars
195
+
196
+ # P-value from chi-squared distribution
197
+ pvalue = 1 - stats.chi2.cdf(wald_stat, df)
198
+
199
+ # Metadata
200
+ delta_dict = {
201
+ mean_vars[i]: float(delta[i].item() if hasattr(delta[i], 'item') else delta[i])
202
+ for i in range(len(delta))
203
+ }
204
+
205
+ # Extract standard errors for reference
206
+ se_delta = np.sqrt(np.diag(vcov_delta))
207
+ se_dict = {
208
+ mean_vars[i]: float(se_delta[i])
209
+ for i in range(len(se_delta))
210
+ }
211
+
212
+ metadata = {
213
+ 'n_time_varying_vars': k_vars,
214
+ 'delta_coefficients': delta_dict,
215
+ 'standard_errors': se_dict,
216
+ 'F_statistic': wald_stat / df if df > 0 else 0.0,
217
+ 'augmented_formula': augmented_formula,
218
+ 'implementation': 'Pooled OLS with cluster-robust SE (entity-clustered)'
219
+ }
220
+
221
+ result = ValidationTestResult(
222
+ test_name="Mundlak Test for RE Specification",
223
+ statistic=wald_stat,
224
+ pvalue=pvalue,
225
+ null_hypothesis="RE is consistent (entity effects uncorrelated with regressors)",
226
+ alternative_hypothesis="RE is inconsistent (use Fixed Effects)",
227
+ alpha=alpha,
228
+ df=df,
229
+ metadata=metadata
230
+ )
231
+
232
+ return result
233
+
234
+ def _get_data_full(self):
235
+ """
236
+ Get full data including DataFrame, formula, and variable names.
237
+
238
+ Returns
239
+ -------
240
+ tuple
241
+ (data, formula, entity_col, time_col, var_names) or
242
+ (None, None, None, None, None) if not available
243
+
244
+ Notes
245
+ -----
246
+ This method extracts:
247
+ - data: Original pandas DataFrame
248
+ - formula: Formula string (e.g., "y ~ x1 + x2")
249
+ - entity_col: Name of entity column
250
+ - time_col: Name of time column
251
+ - var_names: List of regressor names (excluding constant)
252
+ """
253
+ if not hasattr(self.results, '_model'):
254
+ return None, None, None, None, None
255
+
256
+ model = self.results._model
257
+
258
+ if not (hasattr(model, 'formula_parser') and hasattr(model, 'data')):
259
+ return None, None, None, None, None
260
+
261
+ try:
262
+ # Get original data
263
+ data = model.data.data.copy()
264
+
265
+ # Get entity and time columns
266
+ entity_col = model.data.entity_col
267
+ time_col = model.data.time_col
268
+
269
+ # Get formula
270
+ if hasattr(model, 'formula'):
271
+ formula = model.formula
272
+ else:
273
+ return None, None, None, None, None
274
+
275
+ # Extract variable names from formula parser
276
+ # The formula_parser should have information about the terms
277
+ if hasattr(model.formula_parser, 'rhs_terms'):
278
+ # Get RHS terms (excluding Intercept)
279
+ var_names = [
280
+ term for term in model.formula_parser.rhs_terms
281
+ if term.lower() not in ['intercept', '1']
282
+ ]
283
+ else:
284
+ # Fallback: parse formula manually
285
+ # Format: "y ~ x1 + x2 + ..."
286
+ rhs = formula.split('~')[1].strip()
287
+ terms = [t.strip() for t in rhs.split('+')]
288
+ var_names = [
289
+ t for t in terms
290
+ if t.lower() not in ['1', 'intercept', '']
291
+ ]
292
+
293
+ return data, formula, entity_col, time_col, var_names
294
+
295
+ except Exception:
296
+ return None, None, None, None, None
297
+
298
+ def _get_data(self):
299
+ """
300
+ Get design matrix, dependent variable, and entity indices.
301
+
302
+ Returns
303
+ -------
304
+ tuple
305
+ (X, y, entities) or (None, None, None) if not available
306
+
307
+ Notes
308
+ -----
309
+ This is a legacy method kept for compatibility.
310
+ New code should use _get_data_full() instead.
311
+ """
312
+ if not hasattr(self.results, '_model'):
313
+ return None, None, None
314
+
315
+ model = self.results._model
316
+
317
+ if not (hasattr(model, 'formula_parser') and hasattr(model, 'data')):
318
+ return None, None, None
319
+
320
+ try:
321
+ y, X = model.formula_parser.build_design_matrices(
322
+ model.data.data,
323
+ return_type='array'
324
+ )
325
+
326
+ entities = model.data.data[model.data.entity_col].values.ravel()
327
+
328
+ return X, y.ravel(), entities
329
+
330
+ except Exception:
331
+ return None, None, None
@@ -0,0 +1,273 @@
1
+ """
2
+ RESET test for specification errors in panel data models.
3
+
4
+ RESET = Regression Equation Specification Error Test
5
+
6
+ References
7
+ ----------
8
+ Ramsey, J. B. (1969). Tests for Specification Errors in Classical Linear
9
+ Least Squares Regression Analysis. Journal of the Royal Statistical Society,
10
+ Series B, 31(2), 350-371.
11
+
12
+ Wooldridge, J. M. (2010). Econometric Analysis of Cross Section and Panel Data
13
+ (2nd ed.). MIT Press.
14
+ """
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ from scipy import stats
19
+
20
+ from panelbox.validation.base import ValidationTest, ValidationTestResult
21
+
22
+
23
+ class RESETTest(ValidationTest):
24
+ """
25
+ RESET test for functional form specification.
26
+
27
+ Tests the null hypothesis that the model is correctly specified
28
+ (linear functional form is appropriate) against the alternative
29
+ that nonlinear terms are needed.
30
+
31
+ H0: E[y | X] = X*beta (linear specification is correct)
32
+ H1: E[y | X] includes higher-order terms of fitted values
33
+
34
+ The test augments the model with powers of fitted values (ŷ², ŷ³, ...)
35
+ and tests if these terms are jointly significant.
36
+
37
+ Notes
38
+ -----
39
+ The test is implemented by:
40
+ 1. Estimating original model: y = X*beta + e
41
+ 2. Computing fitted values: ŷ = X*beta_hat
42
+ 3. Augmenting model: y = X*beta + gamma2*ŷ² + gamma3*ŷ³ + ... + u
43
+ 4. Testing H0: gamma2 = gamma3 = ... = 0 using F-test
44
+
45
+ For panel data, we use pooled OLS with cluster-robust standard errors
46
+ to account for within-group correlation.
47
+
48
+ Common practice is to include powers 2 and 3 (default).
49
+
50
+ Examples
51
+ --------
52
+ >>> from panelbox.models.static.pooled_ols import PooledOLS
53
+ >>> model = PooledOLS("y ~ x1 + x2", data, "entity", "time")
54
+ >>> results = model.fit()
55
+ >>>
56
+ >>> from panelbox.validation.specification.reset import RESETTest
57
+ >>> test = RESETTest(results)
58
+ >>> result = test.run(powers=[2, 3]) # Test with ŷ² and ŷ³
59
+ >>> print(result)
60
+ """
61
+
62
+ def __init__(self, results: 'PanelResults'):
63
+ """
64
+ Initialize RESET test.
65
+
66
+ Parameters
67
+ ----------
68
+ results : PanelResults
69
+ Results from panel model estimation
70
+ """
71
+ super().__init__(results)
72
+
73
+ def run(self, powers=None, alpha: float = 0.05) -> ValidationTestResult:
74
+ """
75
+ Run RESET test for specification errors.
76
+
77
+ Parameters
78
+ ----------
79
+ powers : list of int, optional
80
+ Powers of fitted values to include in augmented regression.
81
+ Default is [2, 3] (quadratic and cubic terms).
82
+ alpha : float, default=0.05
83
+ Significance level
84
+
85
+ Returns
86
+ -------
87
+ ValidationTestResult
88
+ Test results
89
+
90
+ Raises
91
+ ------
92
+ ValueError
93
+ If powers are invalid or data is not available
94
+
95
+ Notes
96
+ -----
97
+ The test uses an F-statistic for testing joint significance of
98
+ the polynomial terms. For panel data, we use cluster-robust
99
+ standard errors.
100
+ """
101
+ if powers is None:
102
+ powers = [2, 3]
103
+
104
+ # Validate powers
105
+ if not all(isinstance(p, int) and p >= 2 for p in powers):
106
+ raise ValueError("Powers must be integers >= 2")
107
+
108
+ # Get data
109
+ data, formula, entity_col, time_col, var_names = self._get_data_full()
110
+
111
+ if data is None or formula is None:
112
+ raise ValueError(
113
+ "Data and formula required for RESET test. "
114
+ "Ensure the model was estimated with a formula."
115
+ )
116
+
117
+ # Get fitted values from original model
118
+ fitted = self.results.fitted_values
119
+ if fitted is None:
120
+ raise ValueError("Fitted values not available from model results")
121
+
122
+ # Create augmented dataset with powers of fitted values
123
+ data_aug = data.copy()
124
+ power_vars = []
125
+
126
+ for power in powers:
127
+ var_name = f'fitted_pow{power}'
128
+ data_aug[var_name] = fitted ** power
129
+ power_vars.append(var_name)
130
+
131
+ # Build augmented formula
132
+ dep_var = formula.split('~')[0].strip()
133
+ orig_vars = ' + '.join(var_names)
134
+ power_formula = ' + '.join(power_vars)
135
+ augmented_formula = f"{dep_var} ~ {orig_vars} + {power_formula}"
136
+
137
+ # Estimate augmented model with cluster-robust SE
138
+ try:
139
+ from panelbox.models.static.pooled_ols import PooledOLS
140
+
141
+ model_aug = PooledOLS(
142
+ augmented_formula,
143
+ data_aug,
144
+ entity_col,
145
+ time_col
146
+ )
147
+ results_aug = model_aug.fit(
148
+ cov_type='clustered',
149
+ cov_kwds={'groups': entity_col}
150
+ )
151
+
152
+ except Exception as e:
153
+ raise ValueError(f"Failed to estimate augmented model: {e}")
154
+
155
+ # Extract coefficients on power terms
156
+ gamma = results_aug.params[power_vars].values
157
+
158
+ # Extract variance-covariance matrix for power terms
159
+ vcov_gamma = results_aug.cov_params.loc[power_vars, power_vars].values
160
+
161
+ # Wald test: gamma' * Vcov(gamma)^-1 * gamma ~ Chi2(k)
162
+ # where k = number of power terms
163
+ try:
164
+ vcov_inv = np.linalg.inv(vcov_gamma)
165
+ except np.linalg.LinAlgError:
166
+ vcov_inv = np.linalg.pinv(vcov_gamma)
167
+
168
+ wald_stat_array = gamma.T @ vcov_inv @ gamma
169
+ wald_stat = float(
170
+ wald_stat_array.item() if hasattr(wald_stat_array, 'item')
171
+ else wald_stat_array
172
+ )
173
+
174
+ # Degrees of freedom
175
+ df_num = len(powers)
176
+ df_denom = results_aug.nobs - results_aug.params.shape[0]
177
+
178
+ # Convert to F-statistic
179
+ f_stat = wald_stat / df_num
180
+
181
+ # P-value from F distribution
182
+ pvalue = 1 - stats.f.cdf(f_stat, df_num, df_denom)
183
+
184
+ # Alternative: use chi-squared approximation
185
+ pvalue_chi2 = 1 - stats.chi2.cdf(wald_stat, df_num)
186
+
187
+ # Metadata
188
+ gamma_dict = {
189
+ power_vars[i]: float(gamma[i])
190
+ for i in range(len(gamma))
191
+ }
192
+
193
+ se_gamma = np.sqrt(np.diag(vcov_gamma))
194
+ se_dict = {
195
+ power_vars[i]: float(se_gamma[i])
196
+ for i in range(len(se_gamma))
197
+ }
198
+
199
+ metadata = {
200
+ 'powers': powers,
201
+ 'gamma_coefficients': gamma_dict,
202
+ 'standard_errors': se_dict,
203
+ 'wald_statistic': wald_stat,
204
+ 'F_statistic': f_stat,
205
+ 'df_numerator': df_num,
206
+ 'df_denominator': df_denom,
207
+ 'pvalue_chi2': pvalue_chi2,
208
+ 'augmented_formula': augmented_formula
209
+ }
210
+
211
+ result = ValidationTestResult(
212
+ test_name="RESET Test for Specification",
213
+ statistic=f_stat,
214
+ pvalue=pvalue,
215
+ null_hypothesis="Model is correctly specified (linear functional form)",
216
+ alternative_hypothesis="Nonlinear terms needed (specification error)",
217
+ alpha=alpha,
218
+ df=(df_num, df_denom),
219
+ metadata=metadata
220
+ )
221
+
222
+ return result
223
+
224
+ def _get_data_full(self):
225
+ """
226
+ Get full data including DataFrame, formula, and variable names.
227
+
228
+ Returns
229
+ -------
230
+ tuple
231
+ (data, formula, entity_col, time_col, var_names) or
232
+ (None, None, None, None, None) if not available
233
+ """
234
+ if not hasattr(self.results, '_model'):
235
+ return None, None, None, None, None
236
+
237
+ model = self.results._model
238
+
239
+ if not (hasattr(model, 'formula_parser') and hasattr(model, 'data')):
240
+ return None, None, None, None, None
241
+
242
+ try:
243
+ # Get original data
244
+ data = model.data.data.copy()
245
+
246
+ # Get entity and time columns
247
+ entity_col = model.data.entity_col
248
+ time_col = model.data.time_col
249
+
250
+ # Get formula
251
+ if hasattr(model, 'formula'):
252
+ formula = model.formula
253
+ else:
254
+ return None, None, None, None, None
255
+
256
+ # Extract variable names from formula
257
+ if hasattr(model.formula_parser, 'rhs_terms'):
258
+ var_names = [
259
+ term for term in model.formula_parser.rhs_terms
260
+ if term.lower() not in ['intercept', '1']
261
+ ]
262
+ else:
263
+ rhs = formula.split('~')[1].strip()
264
+ terms = [t.strip() for t in rhs.split('+')]
265
+ var_names = [
266
+ t for t in terms
267
+ if t.lower() not in ['1', 'intercept', '']
268
+ ]
269
+
270
+ return data, formula, entity_col, time_col, var_names
271
+
272
+ except Exception:
273
+ return None, None, None, None, None
File without changes